mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-08 20:25:19 +02:00
fix: harden autocomplete endpoint security and error handling
This commit is contained in:
parent
46e8134b23
commit
18103417bb
4 changed files with 64 additions and 14 deletions
|
|
@ -1,18 +1,21 @@
|
|||
from fastapi import APIRouter, Depends
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi.responses import StreamingResponse
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, Field
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.db import User, get_async_session
|
||||
from app.services.new_streaming_service import VercelStreamingService
|
||||
from app.services.vision_autocomplete_service import stream_vision_autocomplete
|
||||
from app.users import current_active_user
|
||||
from app.utils.rbac import check_search_space_access
|
||||
|
||||
router = APIRouter(prefix="/autocomplete", tags=["autocomplete"])
|
||||
|
||||
MAX_SCREENSHOT_SIZE = 20 * 1024 * 1024 # 20 MB base64 ceiling
|
||||
|
||||
|
||||
class VisionAutocompleteRequest(BaseModel):
|
||||
screenshot: str
|
||||
screenshot: str = Field(..., max_length=MAX_SCREENSHOT_SIZE)
|
||||
search_space_id: int
|
||||
app_name: str = ""
|
||||
window_title: str = ""
|
||||
|
|
@ -24,6 +27,8 @@ async def vision_autocomplete_stream(
|
|||
user: User = Depends(current_active_user),
|
||||
session: AsyncSession = Depends(get_async_session),
|
||||
):
|
||||
await check_search_space_access(session, user, body.search_space_id)
|
||||
|
||||
return StreamingResponse(
|
||||
stream_vision_autocomplete(
|
||||
body.screenshot, body.search_space_id, session,
|
||||
|
|
|
|||
|
|
@ -61,11 +61,21 @@ def _build_system_prompt(app_name: str, window_title: str, kb_context: str) -> s
|
|||
return prompt
|
||||
|
||||
|
||||
def _is_vision_unsupported_error(e: Exception) -> bool:
|
||||
"""Check if an exception indicates the model doesn't support vision/images."""
|
||||
msg = str(e).lower()
|
||||
return "content must be a string" in msg or "does not support image" in msg
|
||||
|
||||
|
||||
async def _extract_query_from_screenshot(
|
||||
llm, screenshot_data_url: str,
|
||||
app_name: str = "", window_title: str = "",
|
||||
) -> str | None:
|
||||
"""Ask the Vision LLM to describe what the user is working on."""
|
||||
"""Ask the Vision LLM to describe what the user is working on.
|
||||
|
||||
Raises vision-unsupported errors so the caller can return a
|
||||
friendly message immediately instead of retrying with astream.
|
||||
"""
|
||||
if app_name:
|
||||
prompt_text = EXTRACT_QUERY_PROMPT_WITH_APP.format(
|
||||
app_name=app_name, window_title=window_title,
|
||||
|
|
@ -83,6 +93,8 @@ async def _extract_query_from_screenshot(
|
|||
query = response.content.strip() if hasattr(response, "content") else ""
|
||||
return query if query else None
|
||||
except Exception as e:
|
||||
if _is_vision_unsupported_error(e):
|
||||
raise
|
||||
logger.warning(f"Failed to extract query from screenshot: {e}")
|
||||
return None
|
||||
|
||||
|
|
@ -140,6 +152,10 @@ async def stream_vision_autocomplete(
|
|||
3. Stream the final completion with screenshot + KB + app context
|
||||
"""
|
||||
streaming = VercelStreamingService()
|
||||
vision_error_msg = (
|
||||
"The selected model does not support vision. "
|
||||
"Please set a vision-capable model (e.g. GPT-4o, Gemini) in your search space settings."
|
||||
)
|
||||
|
||||
llm = await get_vision_llm(session, search_space_id)
|
||||
if not llm:
|
||||
|
|
@ -149,9 +165,17 @@ async def stream_vision_autocomplete(
|
|||
return
|
||||
|
||||
kb_context = ""
|
||||
query = await _extract_query_from_screenshot(
|
||||
llm, screenshot_data_url, app_name=app_name, window_title=window_title,
|
||||
)
|
||||
try:
|
||||
query = await _extract_query_from_screenshot(
|
||||
llm, screenshot_data_url, app_name=app_name, window_title=window_title,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Vision autocomplete: selected model does not support vision: {e}")
|
||||
yield streaming.format_message_start()
|
||||
yield streaming.format_error(vision_error_msg)
|
||||
yield streaming.format_done()
|
||||
return
|
||||
|
||||
if query:
|
||||
kb_context = await _search_knowledge_base(session, search_space_id, query)
|
||||
|
||||
|
|
@ -171,10 +195,13 @@ async def stream_vision_autocomplete(
|
|||
]),
|
||||
]
|
||||
|
||||
text_started = False
|
||||
text_id = ""
|
||||
try:
|
||||
yield streaming.format_message_start()
|
||||
text_id = streaming.generate_text_id()
|
||||
yield streaming.format_text_start(text_id)
|
||||
text_started = True
|
||||
|
||||
async for chunk in llm.astream(messages):
|
||||
token = chunk.content if hasattr(chunk, "content") else str(chunk)
|
||||
|
|
@ -186,13 +213,12 @@ async def stream_vision_autocomplete(
|
|||
yield streaming.format_done()
|
||||
|
||||
except Exception as e:
|
||||
error_str = str(e).lower()
|
||||
if "content must be a string" in error_str or "does not support image" in error_str:
|
||||
if text_started:
|
||||
yield streaming.format_text_end(text_id)
|
||||
|
||||
if _is_vision_unsupported_error(e):
|
||||
logger.warning(f"Vision autocomplete: selected model does not support vision: {e}")
|
||||
yield streaming.format_error(
|
||||
"The selected model does not support vision. "
|
||||
"Please set a vision-capable model (e.g. GPT-4o, Gemini) in your search space settings."
|
||||
)
|
||||
yield streaming.format_error(vision_error_msg)
|
||||
else:
|
||||
logger.error(f"Vision autocomplete streaming error: {e}")
|
||||
yield streaming.format_error(str(e))
|
||||
|
|
|
|||
|
|
@ -36,9 +36,17 @@ const AUTO_DISMISS_MS = 3000;
|
|||
export default function SuggestionPage() {
|
||||
const [suggestion, setSuggestion] = useState("");
|
||||
const [isLoading, setIsLoading] = useState(true);
|
||||
const [isDesktop, setIsDesktop] = useState(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const abortRef = useRef<AbortController | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
if (!window.electronAPI?.onAutocompleteContext) {
|
||||
setIsDesktop(false);
|
||||
setIsLoading(false);
|
||||
}
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
if (!error) return;
|
||||
const timer = setTimeout(() => {
|
||||
|
|
@ -153,6 +161,16 @@ export default function SuggestionPage() {
|
|||
return cleanup;
|
||||
}, [fetchSuggestion]);
|
||||
|
||||
if (!isDesktop) {
|
||||
return (
|
||||
<div className="suggestion-tooltip">
|
||||
<span className="suggestion-error-text">
|
||||
This page is only available in the SurfSense desktop app.
|
||||
</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (error) {
|
||||
return (
|
||||
<div className="suggestion-tooltip suggestion-error">
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
html, body {
|
||||
html:has(.suggestion-body),
|
||||
body:has(.suggestion-body) {
|
||||
margin: 0 !important;
|
||||
padding: 0 !important;
|
||||
background: transparent !important;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue