mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-06 20:15:17 +02:00
feat(db): Remove document summary LLM schema
This commit is contained in:
parent
e68b3f9532
commit
290a9539ef
9 changed files with 137 additions and 67 deletions
|
|
@ -0,0 +1,134 @@
|
|||
"""remove document summary llm settings
|
||||
|
||||
Revision ID: 154
|
||||
Revises: 153
|
||||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from alembic import op
|
||||
|
||||
revision: str = "154"
|
||||
down_revision: str | None = "153"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
PUBLICATION_NAME = "zero_publication"
|
||||
|
||||
DOCUMENT_COLS = [
|
||||
"id",
|
||||
"title",
|
||||
"document_type",
|
||||
"search_space_id",
|
||||
"folder_id",
|
||||
"created_by_id",
|
||||
"status",
|
||||
"created_at",
|
||||
"updated_at",
|
||||
]
|
||||
|
||||
USER_COLS = [
|
||||
"id",
|
||||
"pages_limit",
|
||||
"pages_used",
|
||||
"premium_credit_micros_limit",
|
||||
"premium_credit_micros_used",
|
||||
]
|
||||
|
||||
AUTOMATION_RUN_COLS = [
|
||||
"id",
|
||||
"automation_id",
|
||||
"trigger_id",
|
||||
"status",
|
||||
"step_results",
|
||||
"started_at",
|
||||
"finished_at",
|
||||
"created_at",
|
||||
]
|
||||
|
||||
|
||||
def _column_exists(conn, table: str, column: str) -> bool:
|
||||
return (
|
||||
conn.execute(
|
||||
sa.text(
|
||||
"SELECT 1 FROM information_schema.columns "
|
||||
"WHERE table_name = :table AND column_name = :column"
|
||||
),
|
||||
{"table": table, "column": column},
|
||||
).fetchone()
|
||||
is not None
|
||||
)
|
||||
|
||||
|
||||
def _has_zero_version(conn, table: str) -> bool:
|
||||
return _column_exists(conn, table, "_0_version")
|
||||
|
||||
|
||||
def _set_table_ddl(conn) -> str:
|
||||
doc_cols = DOCUMENT_COLS + (['"_0_version"'] if _has_zero_version(conn, "documents") else [])
|
||||
user_cols = USER_COLS + (['"_0_version"'] if _has_zero_version(conn, "user") else [])
|
||||
tables = [
|
||||
"notifications",
|
||||
f"documents ({', '.join(doc_cols)})",
|
||||
"folders",
|
||||
"search_source_connectors",
|
||||
"new_chat_messages",
|
||||
"chat_comments",
|
||||
"chat_session_state",
|
||||
f'"user" ({", ".join(user_cols)})',
|
||||
f"automation_runs ({', '.join(AUTOMATION_RUN_COLS)})",
|
||||
]
|
||||
return f"ALTER PUBLICATION {PUBLICATION_NAME} SET TABLE " + ", ".join(tables)
|
||||
|
||||
|
||||
def _resync_zero_publication(tag: str) -> None:
|
||||
conn = op.get_bind()
|
||||
exists = conn.execute(
|
||||
sa.text("SELECT 1 FROM pg_publication WHERE pubname = :name"),
|
||||
{"name": PUBLICATION_NAME},
|
||||
).fetchone()
|
||||
if not exists:
|
||||
return
|
||||
|
||||
tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
|
||||
with tx:
|
||||
conn.execute(sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'pre-{tag}'"))
|
||||
conn.execute(sa.text(_set_table_ddl(conn)))
|
||||
conn.execute(sa.text(f"COMMENT ON PUBLICATION {PUBLICATION_NAME} IS 'post-{tag}'"))
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
conn = op.get_bind()
|
||||
|
||||
if _column_exists(conn, "searchspaces", "document_summary_llm_id"):
|
||||
op.drop_column("searchspaces", "document_summary_llm_id")
|
||||
|
||||
if _column_exists(conn, "search_source_connectors", "enable_summary"):
|
||||
op.drop_column("search_source_connectors", "enable_summary")
|
||||
|
||||
_resync_zero_publication("154-summary-removal")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
conn = op.get_bind()
|
||||
|
||||
if not _column_exists(conn, "searchspaces", "document_summary_llm_id"):
|
||||
op.add_column(
|
||||
"searchspaces",
|
||||
sa.Column("document_summary_llm_id", sa.Integer(), nullable=True, server_default="0"),
|
||||
)
|
||||
|
||||
if not _column_exists(conn, "search_source_connectors", "enable_summary"):
|
||||
op.add_column(
|
||||
"search_source_connectors",
|
||||
sa.Column(
|
||||
"enable_summary",
|
||||
sa.Boolean(),
|
||||
nullable=False,
|
||||
server_default=sa.text("false"),
|
||||
),
|
||||
)
|
||||
|
||||
_resync_zero_publication("154-summary-removal-downgrade")
|
||||
|
|
@ -1781,9 +1781,6 @@ class SearchSpace(BaseModel, TimestampMixin):
|
|||
agent_llm_id = Column(
|
||||
Integer, nullable=True, default=0
|
||||
) # For agent/chat operations, defaults to Auto mode
|
||||
document_summary_llm_id = Column(
|
||||
Integer, nullable=True, default=0
|
||||
) # For document summarization, defaults to Auto mode
|
||||
image_generation_config_id = Column(
|
||||
Integer, nullable=True, default=0
|
||||
) # For image generation, defaults to Auto mode
|
||||
|
|
@ -1951,12 +1948,6 @@ class SearchSourceConnector(BaseModel, TimestampMixin):
|
|||
last_indexed_at = Column(TIMESTAMP(timezone=True), nullable=True)
|
||||
config = Column(JSON, nullable=False)
|
||||
|
||||
# Summary generation (LLM-based) - disabled by default to save resources.
|
||||
# When enabled, improves hybrid search quality at the cost of LLM calls.
|
||||
enable_summary = Column(
|
||||
Boolean, nullable=False, default=False, server_default="false"
|
||||
)
|
||||
|
||||
# Vision LLM for image files - disabled by default to save cost/time.
|
||||
# When enabled, images are described via a vision language model instead
|
||||
# of falling back to the document parser.
|
||||
|
|
|
|||
|
|
@ -617,9 +617,6 @@ async def get_llm_preferences(
|
|||
|
||||
# Get full config objects for each role
|
||||
agent_llm = await _get_llm_config_by_id(session, search_space.agent_llm_id)
|
||||
document_summary_llm = await _get_llm_config_by_id(
|
||||
session, search_space.document_summary_llm_id
|
||||
)
|
||||
image_generation_config = await _get_image_gen_config_by_id(
|
||||
session, search_space.image_generation_config_id
|
||||
)
|
||||
|
|
@ -629,11 +626,9 @@ async def get_llm_preferences(
|
|||
|
||||
return LLMPreferencesRead(
|
||||
agent_llm_id=search_space.agent_llm_id,
|
||||
document_summary_llm_id=search_space.document_summary_llm_id,
|
||||
image_generation_config_id=search_space.image_generation_config_id,
|
||||
vision_llm_config_id=search_space.vision_llm_config_id,
|
||||
agent_llm=agent_llm,
|
||||
document_summary_llm=document_summary_llm,
|
||||
image_generation_config=image_generation_config,
|
||||
vision_llm_config=vision_llm_config,
|
||||
)
|
||||
|
|
@ -707,9 +702,6 @@ async def update_llm_preferences(
|
|||
|
||||
# Get full config objects for response
|
||||
agent_llm = await _get_llm_config_by_id(session, search_space.agent_llm_id)
|
||||
document_summary_llm = await _get_llm_config_by_id(
|
||||
session, search_space.document_summary_llm_id
|
||||
)
|
||||
image_generation_config = await _get_image_gen_config_by_id(
|
||||
session, search_space.image_generation_config_id
|
||||
)
|
||||
|
|
@ -719,11 +711,9 @@ async def update_llm_preferences(
|
|||
|
||||
return LLMPreferencesRead(
|
||||
agent_llm_id=search_space.agent_llm_id,
|
||||
document_summary_llm_id=search_space.document_summary_llm_id,
|
||||
image_generation_config_id=search_space.image_generation_config_id,
|
||||
vision_llm_config_id=search_space.vision_llm_config_id,
|
||||
agent_llm=agent_llm,
|
||||
document_summary_llm=document_summary_llm,
|
||||
image_generation_config=image_generation_config,
|
||||
vision_llm_config=vision_llm_config,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -221,9 +221,6 @@ class LLMPreferencesRead(BaseModel):
|
|||
agent_llm_id: int | None = Field(
|
||||
None, description="ID of the LLM config to use for agent/chat tasks"
|
||||
)
|
||||
document_summary_llm_id: int | None = Field(
|
||||
None, description="ID of the LLM config to use for document summarization"
|
||||
)
|
||||
image_generation_config_id: int | None = Field(
|
||||
None, description="ID of the image generation config to use"
|
||||
)
|
||||
|
|
@ -234,9 +231,6 @@ class LLMPreferencesRead(BaseModel):
|
|||
agent_llm: dict[str, Any] | None = Field(
|
||||
None, description="Full config for agent LLM"
|
||||
)
|
||||
document_summary_llm: dict[str, Any] | None = Field(
|
||||
None, description="Full config for document summary LLM"
|
||||
)
|
||||
image_generation_config: dict[str, Any] | None = Field(
|
||||
None, description="Full config for image generation"
|
||||
)
|
||||
|
|
@ -253,9 +247,6 @@ class LLMPreferencesUpdate(BaseModel):
|
|||
agent_llm_id: int | None = Field(
|
||||
None, description="ID of the LLM config to use for agent/chat tasks"
|
||||
)
|
||||
document_summary_llm_id: int | None = Field(
|
||||
None, description="ID of the LLM config to use for document summarization"
|
||||
)
|
||||
image_generation_config_id: int | None = Field(
|
||||
None, description="ID of the image generation config to use"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -16,7 +16,6 @@ class SearchSourceConnectorBase(BaseModel):
|
|||
is_indexable: bool
|
||||
last_indexed_at: datetime | None = None
|
||||
config: dict[str, Any]
|
||||
enable_summary: bool = False
|
||||
enable_vision_llm: bool = False
|
||||
periodic_indexing_enabled: bool = False
|
||||
indexing_frequency_minutes: int | None = None
|
||||
|
|
@ -67,7 +66,6 @@ class SearchSourceConnectorUpdate(BaseModel):
|
|||
is_indexable: bool | None = None
|
||||
last_indexed_at: datetime | None = None
|
||||
config: dict[str, Any] | None = None
|
||||
enable_summary: bool | None = None
|
||||
enable_vision_llm: bool | None = None
|
||||
periodic_indexing_enabled: bool | None = None
|
||||
indexing_frequency_minutes: int | None = None
|
||||
|
|
|
|||
|
|
@ -68,7 +68,6 @@ def _is_interactive_auth_provider(
|
|||
|
||||
class LLMRole:
|
||||
AGENT = "agent" # For agent/chat operations
|
||||
DOCUMENT_SUMMARY = "document_summary" # For document summarization
|
||||
|
||||
|
||||
def get_global_llm_config(llm_config_id: int) -> dict | None:
|
||||
|
|
@ -266,7 +265,7 @@ async def get_search_space_llm_instance(
|
|||
Args:
|
||||
session: Database session
|
||||
search_space_id: Search Space ID
|
||||
role: LLM role ('agent' or 'document_summary')
|
||||
role: LLM role ('agent')
|
||||
|
||||
Returns:
|
||||
ChatLiteLLM or ChatLiteLLMRouter instance, or None if not found
|
||||
|
|
@ -283,11 +282,8 @@ async def get_search_space_llm_instance(
|
|||
return None
|
||||
|
||||
# Get the appropriate LLM config ID based on role
|
||||
llm_config_id = None
|
||||
if role == LLMRole.AGENT:
|
||||
llm_config_id = search_space.agent_llm_id
|
||||
elif role == LLMRole.DOCUMENT_SUMMARY:
|
||||
llm_config_id = search_space.document_summary_llm_id
|
||||
else:
|
||||
logger.error(f"Invalid LLM role: {role}")
|
||||
return None
|
||||
|
|
@ -470,20 +466,13 @@ async def get_search_space_llm_instance(
|
|||
|
||||
|
||||
async def get_agent_llm(
|
||||
session: AsyncSession, search_space_id: int
|
||||
) -> ChatLiteLLM | ChatLiteLLMRouter | None:
|
||||
"""Get the search space's agent LLM instance for chat operations."""
|
||||
return await get_search_space_llm_instance(session, search_space_id, LLMRole.AGENT)
|
||||
|
||||
|
||||
async def get_document_summary_llm(
|
||||
session: AsyncSession, search_space_id: int, disable_streaming: bool = False
|
||||
) -> ChatLiteLLM | ChatLiteLLMRouter | None:
|
||||
"""Get the search space's document summary LLM instance."""
|
||||
"""Get the search space's agent LLM instance for chat operations."""
|
||||
return await get_search_space_llm_instance(
|
||||
session,
|
||||
search_space_id,
|
||||
LLMRole.DOCUMENT_SUMMARY,
|
||||
LLMRole.AGENT,
|
||||
disable_streaming=disable_streaming,
|
||||
)
|
||||
|
||||
|
|
@ -645,22 +634,6 @@ async def get_vision_llm(
|
|||
return None
|
||||
|
||||
|
||||
# Backward-compatible alias (LLM preferences are now per-search-space, not per-user)
|
||||
async def get_user_long_context_llm(
|
||||
session: AsyncSession,
|
||||
user_id: str,
|
||||
search_space_id: int,
|
||||
disable_streaming: bool = False,
|
||||
) -> ChatLiteLLM | ChatLiteLLMRouter | None:
|
||||
"""
|
||||
Deprecated: Use get_document_summary_llm instead.
|
||||
The user_id parameter is ignored as LLM preferences are now per-search-space.
|
||||
"""
|
||||
return await get_document_summary_llm(
|
||||
session, search_space_id, disable_streaming=disable_streaming
|
||||
)
|
||||
|
||||
|
||||
def get_planner_llm() -> ChatLiteLLM | None:
|
||||
"""Return a planner LLM instance from the first global config marked
|
||||
``is_planner: true``, or ``None`` if no planner config is defined.
|
||||
|
|
|
|||
|
|
@ -43,7 +43,6 @@ export const searchSourceConnector = z.object({
|
|||
is_active: z.boolean().default(true),
|
||||
last_indexed_at: z.string().nullable(),
|
||||
config: z.record(z.string(), z.any()),
|
||||
enable_summary: z.boolean().default(false),
|
||||
enable_vision_llm: z.boolean().default(false),
|
||||
periodic_indexing_enabled: z.boolean(),
|
||||
indexing_frequency_minutes: z.number().nullable(),
|
||||
|
|
@ -98,7 +97,6 @@ export const createConnectorRequest = z.object({
|
|||
is_active: true,
|
||||
last_indexed_at: true,
|
||||
config: true,
|
||||
enable_summary: true,
|
||||
enable_vision_llm: true,
|
||||
periodic_indexing_enabled: true,
|
||||
indexing_frequency_minutes: true,
|
||||
|
|
@ -124,7 +122,6 @@ export const updateConnectorRequest = z.object({
|
|||
is_active: true,
|
||||
last_indexed_at: true,
|
||||
config: true,
|
||||
enable_summary: true,
|
||||
enable_vision_llm: true,
|
||||
periodic_indexing_enabled: true,
|
||||
indexing_frequency_minutes: true,
|
||||
|
|
|
|||
|
|
@ -384,11 +384,9 @@ export const getGlobalVisionLLMConfigsResponse = z.array(globalVisionLLMConfig);
|
|||
|
||||
export const llmPreferences = z.object({
|
||||
agent_llm_id: z.union([z.number(), z.null()]).optional(),
|
||||
document_summary_llm_id: z.union([z.number(), z.null()]).optional(),
|
||||
image_generation_config_id: z.union([z.number(), z.null()]).optional(),
|
||||
vision_llm_config_id: z.union([z.number(), z.null()]).optional(),
|
||||
agent_llm: z.union([z.record(z.string(), z.unknown()), z.null()]).optional(),
|
||||
document_summary_llm: z.union([z.record(z.string(), z.unknown()), z.null()]).optional(),
|
||||
image_generation_config: z.union([z.record(z.string(), z.unknown()), z.null()]).optional(),
|
||||
vision_llm_config: z.union([z.record(z.string(), z.unknown()), z.null()]).optional(),
|
||||
});
|
||||
|
|
@ -409,7 +407,6 @@ export const updateLLMPreferencesRequest = z.object({
|
|||
search_space_id: z.number(),
|
||||
data: llmPreferences.pick({
|
||||
agent_llm_id: true,
|
||||
document_summary_llm_id: true,
|
||||
image_generation_config_id: true,
|
||||
vision_llm_config_id: true,
|
||||
}),
|
||||
|
|
|
|||
|
|
@ -21,7 +21,6 @@ export const searchSourceConnectorTable = table("search_source_connectors")
|
|||
isIndexable: boolean().from("is_indexable"),
|
||||
lastIndexedAt: number().optional().from("last_indexed_at"),
|
||||
config: json(),
|
||||
enableSummary: boolean().from("enable_summary"),
|
||||
periodicIndexingEnabled: boolean().from("periodic_indexing_enabled"),
|
||||
indexingFrequencyMinutes: number().optional().from("indexing_frequency_minutes"),
|
||||
nextScheduledAt: number().optional().from("next_scheduled_at"),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue