mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-25 00:36:31 +02:00
add vision LLM role for screenshot analysis
This commit is contained in:
parent
8ba571566d
commit
482238e5d4
7 changed files with 91 additions and 2 deletions
|
|
@ -0,0 +1,39 @@
|
|||
"""117_add_vision_llm_id_to_search_spaces
|
||||
|
||||
Revision ID: 117
|
||||
Revises: 116
|
||||
|
||||
Adds vision_llm_id column to search_spaces for vision/screenshot analysis
|
||||
LLM role assignment. Defaults to 0 (Auto mode), same convention as
|
||||
agent_llm_id and document_summary_llm_id.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Sequence
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from alembic import op
|
||||
|
||||
revision: str = "117"
|
||||
down_revision: str | None = "116"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
conn = op.get_bind()
|
||||
existing_columns = [
|
||||
col["name"] for col in sa.inspect(conn).get_columns("search_spaces")
|
||||
]
|
||||
|
||||
if "vision_llm_id" not in existing_columns:
|
||||
op.add_column(
|
||||
"search_spaces",
|
||||
sa.Column("vision_llm_id", sa.Integer(), nullable=True, server_default="0"),
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("search_spaces", "vision_llm_id")
|
||||
|
|
@ -1329,6 +1329,9 @@ class SearchSpace(BaseModel, TimestampMixin):
|
|||
image_generation_config_id = Column(
|
||||
Integer, nullable=True, default=0
|
||||
) # For image generation, defaults to Auto mode
|
||||
vision_llm_id = Column(
|
||||
Integer, nullable=True, default=0
|
||||
) # For vision/screenshot analysis, defaults to Auto mode
|
||||
|
||||
user_id = Column(
|
||||
UUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=False
|
||||
|
|
|
|||
|
|
@ -522,14 +522,17 @@ async def get_llm_preferences(
|
|||
image_generation_config = await _get_image_gen_config_by_id(
|
||||
session, search_space.image_generation_config_id
|
||||
)
|
||||
vision_llm = await _get_llm_config_by_id(session, search_space.vision_llm_id)
|
||||
|
||||
return LLMPreferencesRead(
|
||||
agent_llm_id=search_space.agent_llm_id,
|
||||
document_summary_llm_id=search_space.document_summary_llm_id,
|
||||
image_generation_config_id=search_space.image_generation_config_id,
|
||||
vision_llm_id=search_space.vision_llm_id,
|
||||
agent_llm=agent_llm,
|
||||
document_summary_llm=document_summary_llm,
|
||||
image_generation_config=image_generation_config,
|
||||
vision_llm=vision_llm,
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
|
|
@ -589,14 +592,17 @@ async def update_llm_preferences(
|
|||
image_generation_config = await _get_image_gen_config_by_id(
|
||||
session, search_space.image_generation_config_id
|
||||
)
|
||||
vision_llm = await _get_llm_config_by_id(session, search_space.vision_llm_id)
|
||||
|
||||
return LLMPreferencesRead(
|
||||
agent_llm_id=search_space.agent_llm_id,
|
||||
document_summary_llm_id=search_space.document_summary_llm_id,
|
||||
image_generation_config_id=search_space.image_generation_config_id,
|
||||
vision_llm_id=search_space.vision_llm_id,
|
||||
agent_llm=agent_llm,
|
||||
document_summary_llm=document_summary_llm,
|
||||
image_generation_config=image_generation_config,
|
||||
vision_llm=vision_llm,
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
|
|
|
|||
|
|
@ -182,6 +182,9 @@ class LLMPreferencesRead(BaseModel):
|
|||
image_generation_config_id: int | None = Field(
|
||||
None, description="ID of the image generation config to use"
|
||||
)
|
||||
vision_llm_id: int | None = Field(
|
||||
None, description="ID of the LLM config to use for vision/screenshot analysis"
|
||||
)
|
||||
agent_llm: dict[str, Any] | None = Field(
|
||||
None, description="Full config for agent LLM"
|
||||
)
|
||||
|
|
@ -191,6 +194,9 @@ class LLMPreferencesRead(BaseModel):
|
|||
image_generation_config: dict[str, Any] | None = Field(
|
||||
None, description="Full config for image generation"
|
||||
)
|
||||
vision_llm: dict[str, Any] | None = Field(
|
||||
None, description="Full config for vision LLM"
|
||||
)
|
||||
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
|
|
@ -207,3 +213,6 @@ class LLMPreferencesUpdate(BaseModel):
|
|||
image_generation_config_id: int | None = Field(
|
||||
None, description="ID of the image generation config to use"
|
||||
)
|
||||
vision_llm_id: int | None = Field(
|
||||
None, description="ID of the LLM config to use for vision/screenshot analysis"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ logger = logging.getLogger(__name__)
|
|||
class LLMRole:
|
||||
AGENT = "agent" # For agent/chat operations
|
||||
DOCUMENT_SUMMARY = "document_summary" # For document summarization
|
||||
VISION = "vision" # For vision/screenshot analysis
|
||||
|
||||
|
||||
def get_global_llm_config(llm_config_id: int) -> dict | None:
|
||||
|
|
@ -187,7 +188,7 @@ async def get_search_space_llm_instance(
|
|||
Args:
|
||||
session: Database session
|
||||
search_space_id: Search Space ID
|
||||
role: LLM role ('agent' or 'document_summary')
|
||||
role: LLM role ('agent', 'document_summary', or 'vision')
|
||||
|
||||
Returns:
|
||||
ChatLiteLLM or ChatLiteLLMRouter instance, or None if not found
|
||||
|
|
@ -209,6 +210,8 @@ async def get_search_space_llm_instance(
|
|||
llm_config_id = search_space.agent_llm_id
|
||||
elif role == LLMRole.DOCUMENT_SUMMARY:
|
||||
llm_config_id = search_space.document_summary_llm_id
|
||||
elif role == LLMRole.VISION:
|
||||
llm_config_id = search_space.vision_llm_id
|
||||
else:
|
||||
logger.error(f"Invalid LLM role: {role}")
|
||||
return None
|
||||
|
|
@ -405,6 +408,13 @@ async def get_document_summary_llm(
|
|||
)
|
||||
|
||||
|
||||
async def get_vision_llm(
|
||||
session: AsyncSession, search_space_id: int
|
||||
) -> ChatLiteLLM | ChatLiteLLMRouter | None:
|
||||
"""Get the search space's vision LLM instance for screenshot analysis."""
|
||||
return await get_search_space_llm_instance(session, search_space_id, LLMRole.VISION)
|
||||
|
||||
|
||||
# Backward-compatible alias (LLM preferences are now per-search-space, not per-user)
|
||||
async def get_user_long_context_llm(
|
||||
session: AsyncSession,
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import {
|
|||
Bot,
|
||||
CheckCircle,
|
||||
CircleDashed,
|
||||
Eye,
|
||||
FileText,
|
||||
ImageIcon,
|
||||
RefreshCw,
|
||||
|
|
@ -71,6 +72,15 @@ const ROLE_DESCRIPTIONS = {
|
|||
prefKey: "image_generation_config_id" as const,
|
||||
configType: "image" as const,
|
||||
},
|
||||
vision: {
|
||||
icon: Eye,
|
||||
title: "Vision LLM",
|
||||
description: "Vision-capable model for screenshot analysis and context extraction",
|
||||
color: "text-amber-600 dark:text-amber-400",
|
||||
bgColor: "bg-amber-500/10",
|
||||
prefKey: "vision_llm_id" as const,
|
||||
configType: "llm" as const,
|
||||
},
|
||||
};
|
||||
|
||||
interface LLMRoleManagerProps {
|
||||
|
|
@ -116,6 +126,7 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) {
|
|||
agent_llm_id: preferences.agent_llm_id ?? "",
|
||||
document_summary_llm_id: preferences.document_summary_llm_id ?? "",
|
||||
image_generation_config_id: preferences.image_generation_config_id ?? "",
|
||||
vision_llm_id: preferences.vision_llm_id ?? "",
|
||||
}));
|
||||
|
||||
const [hasChanges, setHasChanges] = useState(false);
|
||||
|
|
@ -126,6 +137,7 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) {
|
|||
agent_llm_id: preferences.agent_llm_id ?? "",
|
||||
document_summary_llm_id: preferences.document_summary_llm_id ?? "",
|
||||
image_generation_config_id: preferences.image_generation_config_id ?? "",
|
||||
vision_llm_id: preferences.vision_llm_id ?? "",
|
||||
};
|
||||
setAssignments(newAssignments);
|
||||
setHasChanges(false);
|
||||
|
|
@ -133,6 +145,7 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) {
|
|||
preferences?.agent_llm_id,
|
||||
preferences?.document_summary_llm_id,
|
||||
preferences?.image_generation_config_id,
|
||||
preferences?.vision_llm_id,
|
||||
]);
|
||||
|
||||
const handleRoleAssignment = (prefKey: string, configId: string) => {
|
||||
|
|
@ -147,6 +160,7 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) {
|
|||
agent_llm_id: preferences.agent_llm_id ?? "",
|
||||
document_summary_llm_id: preferences.document_summary_llm_id ?? "",
|
||||
image_generation_config_id: preferences.image_generation_config_id ?? "",
|
||||
vision_llm_id: preferences.vision_llm_id ?? "",
|
||||
};
|
||||
|
||||
const hasChangesNow = Object.keys(newAssignments).some(
|
||||
|
|
@ -168,6 +182,7 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) {
|
|||
agent_llm_id: toNumericOrUndefined(assignments.agent_llm_id),
|
||||
document_summary_llm_id: toNumericOrUndefined(assignments.document_summary_llm_id),
|
||||
image_generation_config_id: toNumericOrUndefined(assignments.image_generation_config_id),
|
||||
vision_llm_id: toNumericOrUndefined(assignments.vision_llm_id),
|
||||
};
|
||||
|
||||
await updatePreferences({
|
||||
|
|
@ -186,6 +201,7 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) {
|
|||
agent_llm_id: preferences.agent_llm_id ?? "",
|
||||
document_summary_llm_id: preferences.document_summary_llm_id ?? "",
|
||||
image_generation_config_id: preferences.image_generation_config_id ?? "",
|
||||
vision_llm_id: preferences.vision_llm_id ?? "",
|
||||
});
|
||||
setHasChanges(false);
|
||||
};
|
||||
|
|
@ -199,7 +215,10 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) {
|
|||
assignments.document_summary_llm_id !== undefined &&
|
||||
assignments.image_generation_config_id !== "" &&
|
||||
assignments.image_generation_config_id !== null &&
|
||||
assignments.image_generation_config_id !== undefined;
|
||||
assignments.image_generation_config_id !== undefined &&
|
||||
assignments.vision_llm_id !== "" &&
|
||||
assignments.vision_llm_id !== null &&
|
||||
assignments.vision_llm_id !== undefined;
|
||||
|
||||
// Combine global and custom LLM configs
|
||||
const allLLMConfigs = [
|
||||
|
|
|
|||
|
|
@ -264,9 +264,11 @@ export const llmPreferences = z.object({
|
|||
agent_llm_id: z.union([z.number(), z.null()]).optional(),
|
||||
document_summary_llm_id: z.union([z.number(), z.null()]).optional(),
|
||||
image_generation_config_id: z.union([z.number(), z.null()]).optional(),
|
||||
vision_llm_id: z.union([z.number(), z.null()]).optional(),
|
||||
agent_llm: z.union([z.record(z.string(), z.unknown()), z.null()]).optional(),
|
||||
document_summary_llm: z.union([z.record(z.string(), z.unknown()), z.null()]).optional(),
|
||||
image_generation_config: z.union([z.record(z.string(), z.unknown()), z.null()]).optional(),
|
||||
vision_llm: z.union([z.record(z.string(), z.unknown()), z.null()]).optional(),
|
||||
});
|
||||
|
||||
/**
|
||||
|
|
@ -287,6 +289,7 @@ export const updateLLMPreferencesRequest = z.object({
|
|||
agent_llm_id: true,
|
||||
document_summary_llm_id: true,
|
||||
image_generation_config_id: true,
|
||||
vision_llm_id: true,
|
||||
}),
|
||||
});
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue