fix: harden autocomplete endpoint security and error handling

2026-07-24 23:41:10 +02:00 · 2026-04-04 08:57:09 +02:00 · 2026-04-04 08:57:09 +02:00 · 18103417bb
commit 18103417bb
parent 46e8134b23
4 changed files with 64 additions and 14 deletions
--- a/surfsense_backend/app/routes/autocomplete_routes.py
+++ b/surfsense_backend/app/routes/autocomplete_routes.py
@ -1,18 +1,21 @@
-from fastapi import APIRouter, Depends
+from fastapi import APIRouter, Depends, HTTPException
 from fastapi.responses import StreamingResponse
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 from sqlalchemy.ext.asyncio import AsyncSession

 from app.db import User, get_async_session
 from app.services.new_streaming_service import VercelStreamingService
 from app.services.vision_autocomplete_service import stream_vision_autocomplete
 from app.users import current_active_user
+from app.utils.rbac import check_search_space_access

 router = APIRouter(prefix="/autocomplete", tags=["autocomplete"])

+MAX_SCREENSHOT_SIZE = 20 * 1024 * 1024  # 20 MB base64 ceiling
+

 class VisionAutocompleteRequest(BaseModel):
-    screenshot: str
+    screenshot: str = Field(..., max_length=MAX_SCREENSHOT_SIZE)
    search_space_id: int
    app_name: str = ""
    window_title: str = ""
@ -24,6 +27,8 @@ async def vision_autocomplete_stream(
    user: User = Depends(current_active_user),
    session: AsyncSession = Depends(get_async_session),
 ):
+    await check_search_space_access(session, user, body.search_space_id)
+
    return StreamingResponse(
        stream_vision_autocomplete(
            body.screenshot, body.search_space_id, session,
--- a/surfsense_backend/app/services/vision_autocomplete_service.py
+++ b/surfsense_backend/app/services/vision_autocomplete_service.py
@ -61,11 +61,21 @@ def _build_system_prompt(app_name: str, window_title: str, kb_context: str) -> s
    return prompt


+def _is_vision_unsupported_error(e: Exception) -> bool:
+    """Check if an exception indicates the model doesn't support vision/images."""
+    msg = str(e).lower()
+    return "content must be a string" in msg or "does not support image" in msg
+
+
 async def _extract_query_from_screenshot(
    llm, screenshot_data_url: str,
    app_name: str = "", window_title: str = "",
 ) -> str | None:
-    """Ask the Vision LLM to describe what the user is working on."""
+    """Ask the Vision LLM to describe what the user is working on.
+
+    Raises vision-unsupported errors so the caller can return a
+    friendly message immediately instead of retrying with astream.
+    """
    if app_name:
        prompt_text = EXTRACT_QUERY_PROMPT_WITH_APP.format(
            app_name=app_name, window_title=window_title,
@ -83,6 +93,8 @@ async def _extract_query_from_screenshot(
        query = response.content.strip() if hasattr(response, "content") else ""
        return query if query else None
    except Exception as e:
+        if _is_vision_unsupported_error(e):
+            raise
        logger.warning(f"Failed to extract query from screenshot: {e}")
        return None

@ -140,6 +152,10 @@ async def stream_vision_autocomplete(
    3. Stream the final completion with screenshot + KB + app context
    """
    streaming = VercelStreamingService()
+    vision_error_msg = (
+        "The selected model does not support vision. "
+        "Please set a vision-capable model (e.g. GPT-4o, Gemini) in your search space settings."
+    )

    llm = await get_vision_llm(session, search_space_id)
    if not llm:
@ -149,9 +165,17 @@ async def stream_vision_autocomplete(
        return

    kb_context = ""
-    query = await _extract_query_from_screenshot(
-        llm, screenshot_data_url, app_name=app_name, window_title=window_title,
-    )
+    try:
+        query = await _extract_query_from_screenshot(
+            llm, screenshot_data_url, app_name=app_name, window_title=window_title,
+        )
+    except Exception as e:
+        logger.warning(f"Vision autocomplete: selected model does not support vision: {e}")
+        yield streaming.format_message_start()
+        yield streaming.format_error(vision_error_msg)
+        yield streaming.format_done()
+        return
+
    if query:
        kb_context = await _search_knowledge_base(session, search_space_id, query)

@ -171,10 +195,13 @@ async def stream_vision_autocomplete(
        ]),
    ]

+    text_started = False
+    text_id = ""
    try:
        yield streaming.format_message_start()
        text_id = streaming.generate_text_id()
        yield streaming.format_text_start(text_id)
+        text_started = True

        async for chunk in llm.astream(messages):
            token = chunk.content if hasattr(chunk, "content") else str(chunk)
@ -186,13 +213,12 @@ async def stream_vision_autocomplete(
        yield streaming.format_done()

    except Exception as e:
-        error_str = str(e).lower()
-        if "content must be a string" in error_str or "does not support image" in error_str:
+        if text_started:
+            yield streaming.format_text_end(text_id)
+
+        if _is_vision_unsupported_error(e):
            logger.warning(f"Vision autocomplete: selected model does not support vision: {e}")
-            yield streaming.format_error(
-                "The selected model does not support vision. "
-                "Please set a vision-capable model (e.g. GPT-4o, Gemini) in your search space settings."
-            )
+            yield streaming.format_error(vision_error_msg)
        else:
            logger.error(f"Vision autocomplete streaming error: {e}")
            yield streaming.format_error(str(e))
--- a/surfsense_web/app/desktop/suggestion/page.tsx
+++ b/surfsense_web/app/desktop/suggestion/page.tsx
@ -36,9 +36,17 @@ const AUTO_DISMISS_MS = 3000;
 export default function SuggestionPage() {
 	const [suggestion, setSuggestion] = useState("");
 	const [isLoading, setIsLoading] = useState(true);
+	const [isDesktop, setIsDesktop] = useState(true);
 	const [error, setError] = useState<string | null>(null);
 	const abortRef = useRef<AbortController | null>(null);

+	useEffect(() => {
+		if (!window.electronAPI?.onAutocompleteContext) {
+			setIsDesktop(false);
+			setIsLoading(false);
+		}
+	}, []);
+
 	useEffect(() => {
 		if (!error) return;
 		const timer = setTimeout(() => {
@ -153,6 +161,16 @@ export default function SuggestionPage() {
 		return cleanup;
 	}, [fetchSuggestion]);

+	if (!isDesktop) {
+		return (
+			<div className="suggestion-tooltip">
+				<span className="suggestion-error-text">
+					This page is only available in the SurfSense desktop app.
+				</span>
+			</div>
+		);
+	}
+
 	if (error) {
 		return (
 			<div className="suggestion-tooltip suggestion-error">
--- a/surfsense_web/app/desktop/suggestion/suggestion.css
+++ b/surfsense_web/app/desktop/suggestion/suggestion.css
@ -1,4 +1,5 @@
-html, body {
+html:has(.suggestion-body),
+body:has(.suggestion-body) {
  margin: 0 !important;
  padding: 0 !important;
  background: transparent !important;