feat(db): Remove document summary LLM schema

2026-07-22 23:31:12 +02:00 · 2026-06-04 00:48:53 +05:30 · 2026-06-04 00:48:53 +05:30 · 290a9539ef
commit 290a9539ef
parent e68b3f9532
9 changed files with 137 additions and 67 deletions
--- a/surfsense_backend/alembic/versions/154_remove_document_summary_llm.py
+++ b/surfsense_backend/alembic/versions/154_remove_document_summary_llm.py
@ -0,0 +1,134 @@
 """remove document summary llm settings
 Revision ID: 154
 Revises: 153
 """
 from collections.abc import Sequence
 import sqlalchemy as sa
 from alembic import op
 revision: str = "154"
 down_revision: str | None = "153"
 branch_labels: str | Sequence[str] | None = None
 depends_on: str | Sequence[str] | None = None
 PUBLICATION_NAME = "zero_publication"
 DOCUMENT_COLS = [
    "id",
    "title",
    "document_type",
    "search_space_id",
    "folder_id",
    "created_by_id",
    "status",
    "created_at",
    "updated_at",
 ]
 USER_COLS = [
    "id",
    "pages_limit",
    "pages_used",
    "premium_credit_micros_limit",
    "premium_credit_micros_used",
 ]
 AUTOMATION_RUN_COLS = [
    "id",
    "automation_id",
    "trigger_id",
    "status",
    "step_results",
    "started_at",
    "finished_at",
    "created_at",
 ]
 def _column_exists(conn, table: str, column: str) -> bool:
    return (
        conn.execute(
            sa.text(
                "SELECT 1 FROM information_schema.columns "
                "WHERE table_name = :table AND column_name = :column"
            ),
            {"table": table, "column": column},
        ).fetchone()
        is not None
    )
 def _has_zero_version(conn, table: str) -> bool:
    return _column_exists(conn, table, "_0_version")
 def _set_table_ddl(conn) -> str:
    doc_cols = DOCUMENT_COLS + (['"_0_version"'] if _has_zero_version(conn, "documents") else [])
    user_cols = USER_COLS + (['"_0_version"'] if _has_zero_version(conn, "user") else [])
    tables = [
        "notifications",
        f"documents ({', '.join(doc_cols)})",
        "folders",
        "search_source_connectors",
        "new_chat_messages",
        "chat_comments",
        "chat_session_state",
        f'"user" ({", ".join(user_cols)})',
        f"automation_runs ({', '.join(AUTOMATION_RUN_COLS)})",
    ]
    return f"ALTER PUBLICATION {PUBLICATION_NAME} SET TABLE " + ", ".join(tables)
 def _resync_zero_publication(tag: str) -> None:
    conn = op.get_bind()
    exists = conn.execute(
        sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"),
        {"name": PUBLICATION_NAME},
    ).fetchone()
    if not exists:
        return
    tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
    with tx:
        conn.execute(sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'pre-{tag}'"))
        conn.execute(sa.text(_set_table_ddl(conn)))
        conn.execute(sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'post-{tag}'"))
 def upgrade() -> None:
    conn = op.get_bind()
    if _column_exists(conn, "searchspaces", "document_summary_llm_id"):
        op.drop_column("searchspaces", "document_summary_llm_id")
    if _column_exists(conn, "search_source_connectors", "enable_summary"):
        op.drop_column("search_source_connectors", "enable_summary")
    _resync_zero_publication("154-summary-removal")
 def downgrade() -> None:
    conn = op.get_bind()
    if not _column_exists(conn, "searchspaces", "document_summary_llm_id"):
        op.add_column(
            "searchspaces",
            sa.Column("document_summary_llm_id", sa.Integer(), nullable=True, server_default="0"),
        )
    if not _column_exists(conn, "search_source_connectors", "enable_summary"):
        op.add_column(
            "search_source_connectors",
            sa.Column(
                "enable_summary",
                sa.Boolean(),
                nullable=False,
                server_default=sa.text("false"),
            ),
        )
    _resync_zero_publication("154-summary-removal-downgrade")
--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@ -1781,9 +1781,6 @@ class SearchSpace(BaseModel, TimestampMixin):
    agent_llm_id = Column(
        Integer, nullable=True, default=0
    )  # For agent/chat operations, defaults to Auto mode
    document_summary_llm_id = Column(
        Integer, nullable=True, default=0
    )  # For document summarization, defaults to Auto mode
    image_generation_config_id = Column(
        Integer, nullable=True, default=0
    )  # For image generation, defaults to Auto mode
@ -1951,12 +1948,6 @@ class SearchSourceConnector(BaseModel, TimestampMixin):
    last_indexed_at = Column(TIMESTAMP(timezone=True), nullable=True)
    config = Column(JSON, nullable=False)
    # Summary generation (LLM-based) - disabled by default to save resources.
    # When enabled, improves hybrid search quality at the cost of LLM calls.
    enable_summary = Column(
        Boolean, nullable=False, default=False, server_default="false"
    )
    # Vision LLM for image files - disabled by default to save cost/time.
    # When enabled, images are described via a vision language model instead
    # of falling back to the document parser.
--- a/surfsense_backend/app/routes/search_spaces_routes.py
+++ b/surfsense_backend/app/routes/search_spaces_routes.py
@ -617,9 +617,6 @@ async def get_llm_preferences(
        # Get full config objects for each role
        agent_llm = await _get_llm_config_by_id(session, search_space.agent_llm_id)
        document_summary_llm = await _get_llm_config_by_id(
            session, search_space.document_summary_llm_id
        )
        image_generation_config = await _get_image_gen_config_by_id(
            session, search_space.image_generation_config_id
        )
@ -629,11 +626,9 @@ async def get_llm_preferences(
        return LLMPreferencesRead(
            agent_llm_id=search_space.agent_llm_id,
            document_summary_llm_id=search_space.document_summary_llm_id,
            image_generation_config_id=search_space.image_generation_config_id,
            vision_llm_config_id=search_space.vision_llm_config_id,
            agent_llm=agent_llm,
            document_summary_llm=document_summary_llm,
            image_generation_config=image_generation_config,
            vision_llm_config=vision_llm_config,
        )
@ -707,9 +702,6 @@ async def update_llm_preferences(
        # Get full config objects for response
        agent_llm = await _get_llm_config_by_id(session, search_space.agent_llm_id)
        document_summary_llm = await _get_llm_config_by_id(
            session, search_space.document_summary_llm_id
        )
        image_generation_config = await _get_image_gen_config_by_id(
            session, search_space.image_generation_config_id
        )
@ -719,11 +711,9 @@ async def update_llm_preferences(
        return LLMPreferencesRead(
            agent_llm_id=search_space.agent_llm_id,
            document_summary_llm_id=search_space.document_summary_llm_id,
            image_generation_config_id=search_space.image_generation_config_id,
            vision_llm_config_id=search_space.vision_llm_config_id,
            agent_llm=agent_llm,
            document_summary_llm=document_summary_llm,
            image_generation_config=image_generation_config,
            vision_llm_config=vision_llm_config,
        )
--- a/surfsense_backend/app/schemas/new_llm_config.py
+++ b/surfsense_backend/app/schemas/new_llm_config.py
@ -221,9 +221,6 @@ class LLMPreferencesRead(BaseModel):
    agent_llm_id: int | None = Field(
        None, description="ID of the LLM config to use for agent/chat tasks"
    )
    document_summary_llm_id: int | None = Field(
        None, description="ID of the LLM config to use for document summarization"
    )
    image_generation_config_id: int | None = Field(
        None, description="ID of the image generation config to use"
    )
@ -234,9 +231,6 @@ class LLMPreferencesRead(BaseModel):
    agent_llm: dict[str, Any] | None = Field(
        None, description="Full config for agent LLM"
    )
    document_summary_llm: dict[str, Any] | None = Field(
        None, description="Full config for document summary LLM"
    )
    image_generation_config: dict[str, Any] | None = Field(
        None, description="Full config for image generation"
    )
@ -253,9 +247,6 @@ class LLMPreferencesUpdate(BaseModel):
    agent_llm_id: int | None = Field(
        None, description="ID of the LLM config to use for agent/chat tasks"
    )
    document_summary_llm_id: int | None = Field(
        None, description="ID of the LLM config to use for document summarization"
    )
    image_generation_config_id: int | None = Field(
        None, description="ID of the image generation config to use"
    )
--- a/surfsense_backend/app/schemas/search_source_connector.py
+++ b/surfsense_backend/app/schemas/search_source_connector.py
@ -16,7 +16,6 @@ class SearchSourceConnectorBase(BaseModel):
    is_indexable: bool
    last_indexed_at: datetime | None = None
    config: dict[str, Any]
    enable_summary: bool = False
    enable_vision_llm: bool = False
    periodic_indexing_enabled: bool = False
    indexing_frequency_minutes: int | None = None
@ -67,7 +66,6 @@ class SearchSourceConnectorUpdate(BaseModel):
    is_indexable: bool | None = None
    last_indexed_at: datetime | None = None
    config: dict[str, Any] | None = None
    enable_summary: bool | None = None
    enable_vision_llm: bool | None = None
    periodic_indexing_enabled: bool | None = None
    indexing_frequency_minutes: int | None = None
--- a/surfsense_backend/app/services/llm_service.py
+++ b/surfsense_backend/app/services/llm_service.py
@ -68,7 +68,6 @@ def _is_interactive_auth_provider(
 class LLMRole:
    AGENT = "agent"  # For agent/chat operations
    DOCUMENT_SUMMARY = "document_summary"  # For document summarization
 def get_global_llm_config(llm_config_id: int) -> dict | None:
@ -266,7 +265,7 @@ async def get_search_space_llm_instance(
    Args:
        session: Database session
        search_space_id: Search Space ID
-        role: LLM role ('agent' or 'document_summary')
+        role: LLM role ('agent')
    Returns:
        ChatLiteLLM or ChatLiteLLMRouter instance, or None if not found
@ -283,11 +282,8 @@ async def get_search_space_llm_instance(
            return None
        # Get the appropriate LLM config ID based on role
        llm_config_id = None
        if role == LLMRole.AGENT:
            llm_config_id = search_space.agent_llm_id
        elif role == LLMRole.DOCUMENT_SUMMARY:
            llm_config_id = search_space.document_summary_llm_id
        else:
            logger.error(f"Invalid LLM role: {role}")
            return None
@ -470,20 +466,13 @@ async def get_search_space_llm_instance(
 async def get_agent_llm(
    session: AsyncSession, search_space_id: int
 ) -> ChatLiteLLM | ChatLiteLLMRouter | None:
    """Get the search space's agent LLM instance for chat operations."""
    return await get_search_space_llm_instance(session, search_space_id, LLMRole.AGENT)
 async def get_document_summary_llm(
    session: AsyncSession, search_space_id: int, disable_streaming: bool = False
 ) -> ChatLiteLLM | ChatLiteLLMRouter | None:
-    """Get the search space's document summary LLM instance."""
+    """Get the search space's agent LLM instance for chat operations."""
    return await get_search_space_llm_instance(
        session,
        search_space_id,
-        LLMRole.DOCUMENT_SUMMARY,
+        LLMRole.AGENT,
        disable_streaming=disable_streaming,
    )
@ -645,22 +634,6 @@ async def get_vision_llm(
        return None
 # Backward-compatible alias (LLM preferences are now per-search-space, not per-user)
 async def get_user_long_context_llm(
    session: AsyncSession,
    user_id: str,
    search_space_id: int,
    disable_streaming: bool = False,
 ) -> ChatLiteLLM | ChatLiteLLMRouter | None:
    """
    Deprecated: Use get_document_summary_llm instead.
    The user_id parameter is ignored as LLM preferences are now per-search-space.
    """
    return await get_document_summary_llm(
        session, search_space_id, disable_streaming=disable_streaming
    )
 def get_planner_llm() -> ChatLiteLLM | None:
    """Return a planner LLM instance from the first global config marked
    ``is_planner: true``, or ``None`` if no planner config is defined.
--- a/surfsense_web/contracts/types/connector.types.ts
+++ b/surfsense_web/contracts/types/connector.types.ts
@ -43,7 +43,6 @@ export const searchSourceConnector = z.object({
 	is_active: z.boolean().default(true),
 	last_indexed_at: z.string().nullable(),
 	config: z.record(z.string(), z.any()),
 	enable_summary: z.boolean().default(false),
 	enable_vision_llm: z.boolean().default(false),
 	periodic_indexing_enabled: z.boolean(),
 	indexing_frequency_minutes: z.number().nullable(),
@ -98,7 +97,6 @@ export const createConnectorRequest = z.object({
 		is_active: true,
 		last_indexed_at: true,
 		config: true,
 		enable_summary: true,
 		enable_vision_llm: true,
 		periodic_indexing_enabled: true,
 		indexing_frequency_minutes: true,
@ -124,7 +122,6 @@ export const updateConnectorRequest = z.object({
 			is_active: true,
 			last_indexed_at: true,
 			config: true,
 			enable_summary: true,
 			enable_vision_llm: true,
 			periodic_indexing_enabled: true,
 			indexing_frequency_minutes: true,
--- a/surfsense_web/contracts/types/new-llm-config.types.ts
+++ b/surfsense_web/contracts/types/new-llm-config.types.ts
@ -384,11 +384,9 @@ export const getGlobalVisionLLMConfigsResponse = z.array(globalVisionLLMConfig);
 export const llmPreferences = z.object({
 	agent_llm_id: z.union([z.number(), z.null()]).optional(),
 	document_summary_llm_id: z.union([z.number(), z.null()]).optional(),
 	image_generation_config_id: z.union([z.number(), z.null()]).optional(),
 	vision_llm_config_id: z.union([z.number(), z.null()]).optional(),
 	agent_llm: z.union([z.record(z.string(), z.unknown()), z.null()]).optional(),
 	document_summary_llm: z.union([z.record(z.string(), z.unknown()), z.null()]).optional(),
 	image_generation_config: z.union([z.record(z.string(), z.unknown()), z.null()]).optional(),
 	vision_llm_config: z.union([z.record(z.string(), z.unknown()), z.null()]).optional(),
 });
@ -409,7 +407,6 @@ export const updateLLMPreferencesRequest = z.object({
 	search_space_id: z.number(),
 	data: llmPreferences.pick({
 		agent_llm_id: true,
 		document_summary_llm_id: true,
 		image_generation_config_id: true,
 		vision_llm_config_id: true,
 	}),
--- a/surfsense_web/zero/schema/documents.ts
+++ b/surfsense_web/zero/schema/documents.ts
@ -21,7 +21,6 @@ export const searchSourceConnectorTable = table("search_source_connectors")
 		isIndexable: boolean().from("is_indexable"),
 		lastIndexedAt: number().optional().from("last_indexed_at"),
 		config: json(),
 		enableSummary: boolean().from("enable_summary"),
 		periodicIndexingEnabled: boolean().from("periodic_indexing_enabled"),
 		indexingFrequencyMinutes: number().optional().from("indexing_frequency_minutes"),
 		nextScheduledAt: number().optional().from("next_scheduled_at"),