diff --git a/surfsense_backend/app/routes/autocomplete_routes.py b/surfsense_backend/app/routes/autocomplete_routes.py index a6f66f408..bb56709cb 100644 --- a/surfsense_backend/app/routes/autocomplete_routes.py +++ b/surfsense_backend/app/routes/autocomplete_routes.py @@ -1,18 +1,21 @@ -from fastapi import APIRouter, Depends +from fastapi import APIRouter, Depends, HTTPException from fastapi.responses import StreamingResponse -from pydantic import BaseModel +from pydantic import BaseModel, Field from sqlalchemy.ext.asyncio import AsyncSession from app.db import User, get_async_session from app.services.new_streaming_service import VercelStreamingService from app.services.vision_autocomplete_service import stream_vision_autocomplete from app.users import current_active_user +from app.utils.rbac import check_search_space_access router = APIRouter(prefix="/autocomplete", tags=["autocomplete"]) +MAX_SCREENSHOT_SIZE = 20 * 1024 * 1024 # 20 MB base64 ceiling + class VisionAutocompleteRequest(BaseModel): - screenshot: str + screenshot: str = Field(..., max_length=MAX_SCREENSHOT_SIZE) search_space_id: int app_name: str = "" window_title: str = "" @@ -24,6 +27,8 @@ async def vision_autocomplete_stream( user: User = Depends(current_active_user), session: AsyncSession = Depends(get_async_session), ): + await check_search_space_access(session, user, body.search_space_id) + return StreamingResponse( stream_vision_autocomplete( body.screenshot, body.search_space_id, session, diff --git a/surfsense_backend/app/services/vision_autocomplete_service.py b/surfsense_backend/app/services/vision_autocomplete_service.py index e172c6522..2ddb56f51 100644 --- a/surfsense_backend/app/services/vision_autocomplete_service.py +++ b/surfsense_backend/app/services/vision_autocomplete_service.py @@ -61,11 +61,21 @@ def _build_system_prompt(app_name: str, window_title: str, kb_context: str) -> s return prompt +def _is_vision_unsupported_error(e: Exception) -> bool: + """Check if an exception indicates the model doesn't support vision/images.""" + msg = str(e).lower() + return "content must be a string" in msg or "does not support image" in msg + + async def _extract_query_from_screenshot( llm, screenshot_data_url: str, app_name: str = "", window_title: str = "", ) -> str | None: - """Ask the Vision LLM to describe what the user is working on.""" + """Ask the Vision LLM to describe what the user is working on. + + Raises vision-unsupported errors so the caller can return a + friendly message immediately instead of retrying with astream. + """ if app_name: prompt_text = EXTRACT_QUERY_PROMPT_WITH_APP.format( app_name=app_name, window_title=window_title, @@ -83,6 +93,8 @@ async def _extract_query_from_screenshot( query = response.content.strip() if hasattr(response, "content") else "" return query if query else None except Exception as e: + if _is_vision_unsupported_error(e): + raise logger.warning(f"Failed to extract query from screenshot: {e}") return None @@ -140,6 +152,10 @@ async def stream_vision_autocomplete( 3. Stream the final completion with screenshot + KB + app context """ streaming = VercelStreamingService() + vision_error_msg = ( + "The selected model does not support vision. " + "Please set a vision-capable model (e.g. GPT-4o, Gemini) in your search space settings." + ) llm = await get_vision_llm(session, search_space_id) if not llm: @@ -149,9 +165,17 @@ async def stream_vision_autocomplete( return kb_context = "" - query = await _extract_query_from_screenshot( - llm, screenshot_data_url, app_name=app_name, window_title=window_title, - ) + try: + query = await _extract_query_from_screenshot( + llm, screenshot_data_url, app_name=app_name, window_title=window_title, + ) + except Exception as e: + logger.warning(f"Vision autocomplete: selected model does not support vision: {e}") + yield streaming.format_message_start() + yield streaming.format_error(vision_error_msg) + yield streaming.format_done() + return + if query: kb_context = await _search_knowledge_base(session, search_space_id, query) @@ -171,10 +195,13 @@ async def stream_vision_autocomplete( ]), ] + text_started = False + text_id = "" try: yield streaming.format_message_start() text_id = streaming.generate_text_id() yield streaming.format_text_start(text_id) + text_started = True async for chunk in llm.astream(messages): token = chunk.content if hasattr(chunk, "content") else str(chunk) @@ -186,13 +213,12 @@ async def stream_vision_autocomplete( yield streaming.format_done() except Exception as e: - error_str = str(e).lower() - if "content must be a string" in error_str or "does not support image" in error_str: + if text_started: + yield streaming.format_text_end(text_id) + + if _is_vision_unsupported_error(e): logger.warning(f"Vision autocomplete: selected model does not support vision: {e}") - yield streaming.format_error( - "The selected model does not support vision. " - "Please set a vision-capable model (e.g. GPT-4o, Gemini) in your search space settings." - ) + yield streaming.format_error(vision_error_msg) else: logger.error(f"Vision autocomplete streaming error: {e}") yield streaming.format_error(str(e)) diff --git a/surfsense_web/app/desktop/suggestion/page.tsx b/surfsense_web/app/desktop/suggestion/page.tsx index 7188b73c6..03944867f 100644 --- a/surfsense_web/app/desktop/suggestion/page.tsx +++ b/surfsense_web/app/desktop/suggestion/page.tsx @@ -36,9 +36,17 @@ const AUTO_DISMISS_MS = 3000; export default function SuggestionPage() { const [suggestion, setSuggestion] = useState(""); const [isLoading, setIsLoading] = useState(true); + const [isDesktop, setIsDesktop] = useState(true); const [error, setError] = useState(null); const abortRef = useRef(null); + useEffect(() => { + if (!window.electronAPI?.onAutocompleteContext) { + setIsDesktop(false); + setIsLoading(false); + } + }, []); + useEffect(() => { if (!error) return; const timer = setTimeout(() => { @@ -153,6 +161,16 @@ export default function SuggestionPage() { return cleanup; }, [fetchSuggestion]); + if (!isDesktop) { + return ( +
+ + This page is only available in the SurfSense desktop app. + +
+ ); + } + if (error) { return (
diff --git a/surfsense_web/app/desktop/suggestion/suggestion.css b/surfsense_web/app/desktop/suggestion/suggestion.css index 712d12618..62f4d2ea7 100644 --- a/surfsense_web/app/desktop/suggestion/suggestion.css +++ b/surfsense_web/app/desktop/suggestion/suggestion.css @@ -1,4 +1,5 @@ -html, body { +html:has(.suggestion-body), +body:has(.suggestion-body) { margin: 0 !important; padding: 0 !important; background: transparent !important;