feat(db): Remove document summary LLM schema

This commit is contained in:
Anish Sarkar 2026-06-04 00:48:53 +05:30
parent e68b3f9532
commit 290a9539ef
9 changed files with 137 additions and 67 deletions

View file

@ -0,0 +1,134 @@
"""remove document summary llm settings
Revision ID: 154
Revises: 153
"""
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
revision: str = "154"
down_revision: str | None = "153"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
PUBLICATION_NAME = "zero_publication"
DOCUMENT_COLS = [
"id",
"title",
"document_type",
"search_space_id",
"folder_id",
"created_by_id",
"status",
"created_at",
"updated_at",
]
USER_COLS = [
"id",
"pages_limit",
"pages_used",
"premium_credit_micros_limit",
"premium_credit_micros_used",
]
AUTOMATION_RUN_COLS = [
"id",
"automation_id",
"trigger_id",
"status",
"step_results",
"started_at",
"finished_at",
"created_at",
]
def _column_exists(conn, table: str, column: str) -> bool:
return (
conn.execute(
sa.text(
"SELECT 1 FROM information_schema.columns "
"WHERE table_name = :table AND column_name = :column"
),
{"table": table, "column": column},
).fetchone()
is not None
)
def _has_zero_version(conn, table: str) -> bool:
return _column_exists(conn, table, "_0_version")
def _set_table_ddl(conn) -> str:
doc_cols = DOCUMENT_COLS + (['"_0_version"'] if _has_zero_version(conn, "documents") else [])
user_cols = USER_COLS + (['"_0_version"'] if _has_zero_version(conn, "user") else [])
tables = [
"notifications",
f"documents ({', '.join(doc_cols)})",
"folders",
"search_source_connectors",
"new_chat_messages",
"chat_comments",
"chat_session_state",
f'"user" ({", ".join(user_cols)})',
f"automation_runs ({', '.join(AUTOMATION_RUN_COLS)})",
]
return f"ALTER PUBLICATION {PUBLICATION_NAME} SET TABLE " + ", ".join(tables)
def _resync_zero_publication(tag: str) -> None:
conn = op.get_bind()
exists = conn.execute(
sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"),
{"name": PUBLICATION_NAME},
).fetchone()
if not exists:
return
tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
with tx:
conn.execute(sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'pre-{tag}'"))
conn.execute(sa.text(_set_table_ddl(conn)))
conn.execute(sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'post-{tag}'"))
def upgrade() -> None:
conn = op.get_bind()
if _column_exists(conn, "searchspaces", "document_summary_llm_id"):
op.drop_column("searchspaces", "document_summary_llm_id")
if _column_exists(conn, "search_source_connectors", "enable_summary"):
op.drop_column("search_source_connectors", "enable_summary")
_resync_zero_publication("154-summary-removal")
def downgrade() -> None:
conn = op.get_bind()
if not _column_exists(conn, "searchspaces", "document_summary_llm_id"):
op.add_column(
"searchspaces",
sa.Column("document_summary_llm_id", sa.Integer(), nullable=True, server_default="0"),
)
if not _column_exists(conn, "search_source_connectors", "enable_summary"):
op.add_column(
"search_source_connectors",
sa.Column(
"enable_summary",
sa.Boolean(),
nullable=False,
server_default=sa.text("false"),
),
)
_resync_zero_publication("154-summary-removal-downgrade")

View file

@ -1781,9 +1781,6 @@ class SearchSpace(BaseModel, TimestampMixin):
agent_llm_id = Column( agent_llm_id = Column(
Integer, nullable=True, default=0 Integer, nullable=True, default=0
) # For agent/chat operations, defaults to Auto mode ) # For agent/chat operations, defaults to Auto mode
document_summary_llm_id = Column(
Integer, nullable=True, default=0
) # For document summarization, defaults to Auto mode
image_generation_config_id = Column( image_generation_config_id = Column(
Integer, nullable=True, default=0 Integer, nullable=True, default=0
) # For image generation, defaults to Auto mode ) # For image generation, defaults to Auto mode
@ -1951,12 +1948,6 @@ class SearchSourceConnector(BaseModel, TimestampMixin):
last_indexed_at = Column(TIMESTAMP(timezone=True), nullable=True) last_indexed_at = Column(TIMESTAMP(timezone=True), nullable=True)
config = Column(JSON, nullable=False) config = Column(JSON, nullable=False)
# Summary generation (LLM-based) - disabled by default to save resources.
# When enabled, improves hybrid search quality at the cost of LLM calls.
enable_summary = Column(
Boolean, nullable=False, default=False, server_default="false"
)
# Vision LLM for image files - disabled by default to save cost/time. # Vision LLM for image files - disabled by default to save cost/time.
# When enabled, images are described via a vision language model instead # When enabled, images are described via a vision language model instead
# of falling back to the document parser. # of falling back to the document parser.

View file

@ -617,9 +617,6 @@ async def get_llm_preferences(
# Get full config objects for each role # Get full config objects for each role
agent_llm = await _get_llm_config_by_id(session, search_space.agent_llm_id) agent_llm = await _get_llm_config_by_id(session, search_space.agent_llm_id)
document_summary_llm = await _get_llm_config_by_id(
session, search_space.document_summary_llm_id
)
image_generation_config = await _get_image_gen_config_by_id( image_generation_config = await _get_image_gen_config_by_id(
session, search_space.image_generation_config_id session, search_space.image_generation_config_id
) )
@ -629,11 +626,9 @@ async def get_llm_preferences(
return LLMPreferencesRead( return LLMPreferencesRead(
agent_llm_id=search_space.agent_llm_id, agent_llm_id=search_space.agent_llm_id,
document_summary_llm_id=search_space.document_summary_llm_id,
image_generation_config_id=search_space.image_generation_config_id, image_generation_config_id=search_space.image_generation_config_id,
vision_llm_config_id=search_space.vision_llm_config_id, vision_llm_config_id=search_space.vision_llm_config_id,
agent_llm=agent_llm, agent_llm=agent_llm,
document_summary_llm=document_summary_llm,
image_generation_config=image_generation_config, image_generation_config=image_generation_config,
vision_llm_config=vision_llm_config, vision_llm_config=vision_llm_config,
) )
@ -707,9 +702,6 @@ async def update_llm_preferences(
# Get full config objects for response # Get full config objects for response
agent_llm = await _get_llm_config_by_id(session, search_space.agent_llm_id) agent_llm = await _get_llm_config_by_id(session, search_space.agent_llm_id)
document_summary_llm = await _get_llm_config_by_id(
session, search_space.document_summary_llm_id
)
image_generation_config = await _get_image_gen_config_by_id( image_generation_config = await _get_image_gen_config_by_id(
session, search_space.image_generation_config_id session, search_space.image_generation_config_id
) )
@ -719,11 +711,9 @@ async def update_llm_preferences(
return LLMPreferencesRead( return LLMPreferencesRead(
agent_llm_id=search_space.agent_llm_id, agent_llm_id=search_space.agent_llm_id,
document_summary_llm_id=search_space.document_summary_llm_id,
image_generation_config_id=search_space.image_generation_config_id, image_generation_config_id=search_space.image_generation_config_id,
vision_llm_config_id=search_space.vision_llm_config_id, vision_llm_config_id=search_space.vision_llm_config_id,
agent_llm=agent_llm, agent_llm=agent_llm,
document_summary_llm=document_summary_llm,
image_generation_config=image_generation_config, image_generation_config=image_generation_config,
vision_llm_config=vision_llm_config, vision_llm_config=vision_llm_config,
) )

View file

@ -221,9 +221,6 @@ class LLMPreferencesRead(BaseModel):
agent_llm_id: int | None = Field( agent_llm_id: int | None = Field(
None, description="ID of the LLM config to use for agent/chat tasks" None, description="ID of the LLM config to use for agent/chat tasks"
) )
document_summary_llm_id: int | None = Field(
None, description="ID of the LLM config to use for document summarization"
)
image_generation_config_id: int | None = Field( image_generation_config_id: int | None = Field(
None, description="ID of the image generation config to use" None, description="ID of the image generation config to use"
) )
@ -234,9 +231,6 @@ class LLMPreferencesRead(BaseModel):
agent_llm: dict[str, Any] | None = Field( agent_llm: dict[str, Any] | None = Field(
None, description="Full config for agent LLM" None, description="Full config for agent LLM"
) )
document_summary_llm: dict[str, Any] | None = Field(
None, description="Full config for document summary LLM"
)
image_generation_config: dict[str, Any] | None = Field( image_generation_config: dict[str, Any] | None = Field(
None, description="Full config for image generation" None, description="Full config for image generation"
) )
@ -253,9 +247,6 @@ class LLMPreferencesUpdate(BaseModel):
agent_llm_id: int | None = Field( agent_llm_id: int | None = Field(
None, description="ID of the LLM config to use for agent/chat tasks" None, description="ID of the LLM config to use for agent/chat tasks"
) )
document_summary_llm_id: int | None = Field(
None, description="ID of the LLM config to use for document summarization"
)
image_generation_config_id: int | None = Field( image_generation_config_id: int | None = Field(
None, description="ID of the image generation config to use" None, description="ID of the image generation config to use"
) )

View file

@ -16,7 +16,6 @@ class SearchSourceConnectorBase(BaseModel):
is_indexable: bool is_indexable: bool
last_indexed_at: datetime | None = None last_indexed_at: datetime | None = None
config: dict[str, Any] config: dict[str, Any]
enable_summary: bool = False
enable_vision_llm: bool = False enable_vision_llm: bool = False
periodic_indexing_enabled: bool = False periodic_indexing_enabled: bool = False
indexing_frequency_minutes: int | None = None indexing_frequency_minutes: int | None = None
@ -67,7 +66,6 @@ class SearchSourceConnectorUpdate(BaseModel):
is_indexable: bool | None = None is_indexable: bool | None = None
last_indexed_at: datetime | None = None last_indexed_at: datetime | None = None
config: dict[str, Any] | None = None config: dict[str, Any] | None = None
enable_summary: bool | None = None
enable_vision_llm: bool | None = None enable_vision_llm: bool | None = None
periodic_indexing_enabled: bool | None = None periodic_indexing_enabled: bool | None = None
indexing_frequency_minutes: int | None = None indexing_frequency_minutes: int | None = None

View file

@ -68,7 +68,6 @@ def _is_interactive_auth_provider(
class LLMRole: class LLMRole:
AGENT = "agent" # For agent/chat operations AGENT = "agent" # For agent/chat operations
DOCUMENT_SUMMARY = "document_summary" # For document summarization
def get_global_llm_config(llm_config_id: int) -> dict | None: def get_global_llm_config(llm_config_id: int) -> dict | None:
@ -266,7 +265,7 @@ async def get_search_space_llm_instance(
Args: Args:
session: Database session session: Database session
search_space_id: Search Space ID search_space_id: Search Space ID
role: LLM role ('agent' or 'document_summary') role: LLM role ('agent')
Returns: Returns:
ChatLiteLLM or ChatLiteLLMRouter instance, or None if not found ChatLiteLLM or ChatLiteLLMRouter instance, or None if not found
@ -283,11 +282,8 @@ async def get_search_space_llm_instance(
return None return None
# Get the appropriate LLM config ID based on role # Get the appropriate LLM config ID based on role
llm_config_id = None
if role == LLMRole.AGENT: if role == LLMRole.AGENT:
llm_config_id = search_space.agent_llm_id llm_config_id = search_space.agent_llm_id
elif role == LLMRole.DOCUMENT_SUMMARY:
llm_config_id = search_space.document_summary_llm_id
else: else:
logger.error(f"Invalid LLM role: {role}") logger.error(f"Invalid LLM role: {role}")
return None return None
@ -470,20 +466,13 @@ async def get_search_space_llm_instance(
async def get_agent_llm( async def get_agent_llm(
session: AsyncSession, search_space_id: int
) -> ChatLiteLLM | ChatLiteLLMRouter | None:
"""Get the search space's agent LLM instance for chat operations."""
return await get_search_space_llm_instance(session, search_space_id, LLMRole.AGENT)
async def get_document_summary_llm(
session: AsyncSession, search_space_id: int, disable_streaming: bool = False session: AsyncSession, search_space_id: int, disable_streaming: bool = False
) -> ChatLiteLLM | ChatLiteLLMRouter | None: ) -> ChatLiteLLM | ChatLiteLLMRouter | None:
"""Get the search space's document summary LLM instance.""" """Get the search space's agent LLM instance for chat operations."""
return await get_search_space_llm_instance( return await get_search_space_llm_instance(
session, session,
search_space_id, search_space_id,
LLMRole.DOCUMENT_SUMMARY, LLMRole.AGENT,
disable_streaming=disable_streaming, disable_streaming=disable_streaming,
) )
@ -645,22 +634,6 @@ async def get_vision_llm(
return None return None
# Backward-compatible alias (LLM preferences are now per-search-space, not per-user)
async def get_user_long_context_llm(
session: AsyncSession,
user_id: str,
search_space_id: int,
disable_streaming: bool = False,
) -> ChatLiteLLM | ChatLiteLLMRouter | None:
"""
Deprecated: Use get_document_summary_llm instead.
The user_id parameter is ignored as LLM preferences are now per-search-space.
"""
return await get_document_summary_llm(
session, search_space_id, disable_streaming=disable_streaming
)
def get_planner_llm() -> ChatLiteLLM | None: def get_planner_llm() -> ChatLiteLLM | None:
"""Return a planner LLM instance from the first global config marked """Return a planner LLM instance from the first global config marked
``is_planner: true``, or ``None`` if no planner config is defined. ``is_planner: true``, or ``None`` if no planner config is defined.

View file

@ -43,7 +43,6 @@ export const searchSourceConnector = z.object({
is_active: z.boolean().default(true), is_active: z.boolean().default(true),
last_indexed_at: z.string().nullable(), last_indexed_at: z.string().nullable(),
config: z.record(z.string(), z.any()), config: z.record(z.string(), z.any()),
enable_summary: z.boolean().default(false),
enable_vision_llm: z.boolean().default(false), enable_vision_llm: z.boolean().default(false),
periodic_indexing_enabled: z.boolean(), periodic_indexing_enabled: z.boolean(),
indexing_frequency_minutes: z.number().nullable(), indexing_frequency_minutes: z.number().nullable(),
@ -98,7 +97,6 @@ export const createConnectorRequest = z.object({
is_active: true, is_active: true,
last_indexed_at: true, last_indexed_at: true,
config: true, config: true,
enable_summary: true,
enable_vision_llm: true, enable_vision_llm: true,
periodic_indexing_enabled: true, periodic_indexing_enabled: true,
indexing_frequency_minutes: true, indexing_frequency_minutes: true,
@ -124,7 +122,6 @@ export const updateConnectorRequest = z.object({
is_active: true, is_active: true,
last_indexed_at: true, last_indexed_at: true,
config: true, config: true,
enable_summary: true,
enable_vision_llm: true, enable_vision_llm: true,
periodic_indexing_enabled: true, periodic_indexing_enabled: true,
indexing_frequency_minutes: true, indexing_frequency_minutes: true,

View file

@ -384,11 +384,9 @@ export const getGlobalVisionLLMConfigsResponse = z.array(globalVisionLLMConfig);
export const llmPreferences = z.object({ export const llmPreferences = z.object({
agent_llm_id: z.union([z.number(), z.null()]).optional(), agent_llm_id: z.union([z.number(), z.null()]).optional(),
document_summary_llm_id: z.union([z.number(), z.null()]).optional(),
image_generation_config_id: z.union([z.number(), z.null()]).optional(), image_generation_config_id: z.union([z.number(), z.null()]).optional(),
vision_llm_config_id: z.union([z.number(), z.null()]).optional(), vision_llm_config_id: z.union([z.number(), z.null()]).optional(),
agent_llm: z.union([z.record(z.string(), z.unknown()), z.null()]).optional(), agent_llm: z.union([z.record(z.string(), z.unknown()), z.null()]).optional(),
document_summary_llm: z.union([z.record(z.string(), z.unknown()), z.null()]).optional(),
image_generation_config: z.union([z.record(z.string(), z.unknown()), z.null()]).optional(), image_generation_config: z.union([z.record(z.string(), z.unknown()), z.null()]).optional(),
vision_llm_config: z.union([z.record(z.string(), z.unknown()), z.null()]).optional(), vision_llm_config: z.union([z.record(z.string(), z.unknown()), z.null()]).optional(),
}); });
@ -409,7 +407,6 @@ export const updateLLMPreferencesRequest = z.object({
search_space_id: z.number(), search_space_id: z.number(),
data: llmPreferences.pick({ data: llmPreferences.pick({
agent_llm_id: true, agent_llm_id: true,
document_summary_llm_id: true,
image_generation_config_id: true, image_generation_config_id: true,
vision_llm_config_id: true, vision_llm_config_id: true,
}), }),

View file

@ -21,7 +21,6 @@ export const searchSourceConnectorTable = table("search_source_connectors")
isIndexable: boolean().from("is_indexable"), isIndexable: boolean().from("is_indexable"),
lastIndexedAt: number().optional().from("last_indexed_at"), lastIndexedAt: number().optional().from("last_indexed_at"),
config: json(), config: json(),
enableSummary: boolean().from("enable_summary"),
periodicIndexingEnabled: boolean().from("periodic_indexing_enabled"), periodicIndexingEnabled: boolean().from("periodic_indexing_enabled"),
indexingFrequencyMinutes: number().optional().from("indexing_frequency_minutes"), indexingFrequencyMinutes: number().optional().from("indexing_frequency_minutes"),
nextScheduledAt: number().optional().from("next_scheduled_at"), nextScheduledAt: number().optional().from("next_scheduled_at"),