diff --git a/surfsense_backend/app/routes/autocomplete_routes.py b/surfsense_backend/app/routes/autocomplete_routes.py index 9a285a723..68c56d0e0 100644 --- a/surfsense_backend/app/routes/autocomplete_routes.py +++ b/surfsense_backend/app/routes/autocomplete_routes.py @@ -1,118 +1,14 @@ -import logging -from typing import AsyncGenerator - from fastapi import APIRouter, Depends, Query from fastapi.responses import StreamingResponse -from langchain_core.messages import HumanMessage, SystemMessage from sqlalchemy.ext.asyncio import AsyncSession from app.db import User, get_async_session -from app.retriever.chunks_hybrid_search import ChucksHybridSearchRetriever -from app.services.llm_service import get_agent_llm +from app.services.autocomplete_service import stream_autocomplete from app.services.new_streaming_service import VercelStreamingService from app.users import current_active_user -logger = logging.getLogger(__name__) - router = APIRouter(prefix="/autocomplete", tags=["autocomplete"]) -AUTOCOMPLETE_SYSTEM_PROMPT = """You are an inline text autocomplete engine. Your job is to complete the user's text naturally. - -Rules: -- Output ONLY the continuation text. Do NOT repeat what the user already typed. -- Keep completions concise: 1-3 sentences maximum. -- Match the user's tone, style, and language. -- If knowledge base context is provided, use it to make the completion factually accurate and personalized. -- Do NOT add quotes, explanations, or meta-commentary. -- Do NOT start with a space unless grammatically required. -- If you cannot produce a useful completion, output nothing.""" - -KB_CONTEXT_TEMPLATE = """ -Relevant knowledge base context (use this to personalize the completion): ---- -{kb_context} ---- -""" - - -async def _stream_autocomplete( - text: str, - cursor_position: int, - search_space_id: int, - session: AsyncSession, -) -> AsyncGenerator[str, None]: - """Stream an autocomplete response with KB context.""" - streaming_service = VercelStreamingService() - - try: - # Text before cursor is what we're completing - text_before_cursor = text[:cursor_position] if cursor_position >= 0 else text - - if not text_before_cursor.strip(): - yield streaming_service.format_message_start() - yield streaming_service.format_finish() - yield streaming_service.format_done() - return - - # Fast KB lookup: vector-only search, top 3 chunks, no planner LLM - kb_context = "" - try: - retriever = ChucksHybridSearchRetriever(session) - chunks = await retriever.vector_search( - query_text=text_before_cursor[-200:], # last 200 chars for relevance - top_k=3, - search_space_id=search_space_id, - ) - if chunks: - kb_snippets = [] - for chunk in chunks: - content = getattr(chunk, "content", None) or getattr(chunk, "chunk_text", "") - if content: - kb_snippets.append(content[:300]) - if kb_snippets: - kb_context = KB_CONTEXT_TEMPLATE.format( - kb_context="\n\n".join(kb_snippets) - ) - except Exception as e: - logger.warning(f"KB search failed for autocomplete, proceeding without context: {e}") - - # Get the search space's configured LLM - llm = await get_agent_llm(session, search_space_id) - if not llm: - yield streaming_service.format_message_start() - error_msg = "No LLM configured for this search space" - yield streaming_service.format_error(error_msg) - yield streaming_service.format_done() - return - - system_prompt = AUTOCOMPLETE_SYSTEM_PROMPT - if kb_context: - system_prompt += kb_context - - messages = [ - SystemMessage(content=system_prompt), - HumanMessage(content=f"Complete this text:\n{text_before_cursor}"), - ] - - # Stream the response - yield streaming_service.format_message_start() - text_id = streaming_service.generate_text_id() - yield streaming_service.format_text_start(text_id) - - async for chunk in llm.astream(messages): - token = chunk.content if hasattr(chunk, "content") else str(chunk) - if token: - yield streaming_service.format_text_delta(text_id, token) - - yield streaming_service.format_text_end(text_id) - yield streaming_service.format_finish() - yield streaming_service.format_done() - - except Exception as e: - logger.error(f"Autocomplete streaming error: {e}") - yield streaming_service.format_error(str(e)) - yield streaming_service.format_done() - @router.post("/stream") async def autocomplete_stream( @@ -122,12 +18,11 @@ async def autocomplete_stream( user: User = Depends(current_active_user), session: AsyncSession = Depends(get_async_session), ): - """Stream an autocomplete suggestion based on the current text and KB context.""" if cursor_position < 0: cursor_position = len(text) return StreamingResponse( - _stream_autocomplete(text, cursor_position, search_space_id, session), + stream_autocomplete(text, cursor_position, search_space_id, session), media_type="text/event-stream", headers={ **VercelStreamingService.get_response_headers(), diff --git a/surfsense_backend/app/services/autocomplete_service.py b/surfsense_backend/app/services/autocomplete_service.py new file mode 100644 index 000000000..7c172275d --- /dev/null +++ b/surfsense_backend/app/services/autocomplete_service.py @@ -0,0 +1,110 @@ +import logging +from typing import AsyncGenerator + +from langchain_core.messages import HumanMessage, SystemMessage +from sqlalchemy.ext.asyncio import AsyncSession + +from app.retriever.chunks_hybrid_search import ChucksHybridSearchRetriever +from app.services.llm_service import get_agent_llm +from app.services.new_streaming_service import VercelStreamingService + +logger = logging.getLogger(__name__) + +SYSTEM_PROMPT = """You are an inline text autocomplete engine. Your job is to complete the user's text naturally. + +Rules: +- Output ONLY the continuation text. Do NOT repeat what the user already typed. +- Keep completions concise: 1-3 sentences maximum. +- Match the user's tone, style, and language. +- If knowledge base context is provided, use it to make the completion factually accurate and personalized. +- Do NOT add quotes, explanations, or meta-commentary. +- Do NOT start with a space unless grammatically required. +- If you cannot produce a useful completion, output nothing.""" + +KB_CONTEXT_TEMPLATE = """ +Relevant knowledge base context (use this to personalize the completion): +--- +{kb_context} +--- +""" + + +async def _retrieve_kb_context( + session: AsyncSession, + text: str, + search_space_id: int, +) -> str: + try: + retriever = ChucksHybridSearchRetriever(session) + chunks = await retriever.vector_search( + query_text=text[-200:], + top_k=3, + search_space_id=search_space_id, + ) + if not chunks: + return "" + snippets = [] + for chunk in chunks: + content = getattr(chunk, "content", None) or getattr(chunk, "chunk_text", "") + if content: + snippets.append(content[:300]) + if not snippets: + return "" + return KB_CONTEXT_TEMPLATE.format(kb_context="\n\n".join(snippets)) + except Exception as e: + logger.warning(f"KB search failed for autocomplete, proceeding without context: {e}") + return "" + + +async def stream_autocomplete( + text: str, + cursor_position: int, + search_space_id: int, + session: AsyncSession, +) -> AsyncGenerator[str, None]: + """Build context, call the LLM, and yield SSE-formatted tokens.""" + streaming = VercelStreamingService() + text_before_cursor = text[:cursor_position] if cursor_position >= 0 else text + + if not text_before_cursor.strip(): + yield streaming.format_message_start() + yield streaming.format_finish() + yield streaming.format_done() + return + + kb_context = await _retrieve_kb_context(session, text_before_cursor, search_space_id) + + llm = await get_agent_llm(session, search_space_id) + if not llm: + yield streaming.format_message_start() + yield streaming.format_error("No LLM configured for this search space") + yield streaming.format_done() + return + + system_prompt = SYSTEM_PROMPT + if kb_context: + system_prompt += kb_context + + messages = [ + SystemMessage(content=system_prompt), + HumanMessage(content=f"Complete this text:\n{text_before_cursor}"), + ] + + try: + yield streaming.format_message_start() + text_id = streaming.generate_text_id() + yield streaming.format_text_start(text_id) + + async for chunk in llm.astream(messages): + token = chunk.content if hasattr(chunk, "content") else str(chunk) + if token: + yield streaming.format_text_delta(text_id, token) + + yield streaming.format_text_end(text_id) + yield streaming.format_finish() + yield streaming.format_done() + + except Exception as e: + logger.error(f"Autocomplete streaming error: {e}") + yield streaming.format_error(str(e)) + yield streaming.format_done() diff --git a/surfsense_desktop/src/modules/autocomplete/suggestion-window.ts b/surfsense_desktop/src/modules/autocomplete/suggestion-window.ts index f03930cf6..e8a2f3a91 100644 --- a/surfsense_desktop/src/modules/autocomplete/suggestion-window.ts +++ b/surfsense_desktop/src/modules/autocomplete/suggestion-window.ts @@ -8,14 +8,22 @@ const MAX_HEIGHT = 400; let suggestionWindow: BrowserWindow | null = null; let resizeTimer: ReturnType | null = null; +let cursorOrigin = { x: 0, y: 0 }; -function clampToScreen(x: number, y: number, w: number, h: number): { x: number; y: number } { - const display = screen.getDisplayNearestPoint({ x, y }); +const CURSOR_GAP = 20; + +function positionOnScreen(cursorX: number, cursorY: number, w: number, h: number): { x: number; y: number } { + const display = screen.getDisplayNearestPoint({ x: cursorX, y: cursorY }); const { x: dx, y: dy, width: dw, height: dh } = display.workArea; - return { - x: Math.max(dx, Math.min(x, dx + dw - w)), - y: Math.max(dy, Math.min(y, dy + dh - h)), - }; + + const x = Math.max(dx, Math.min(cursorX, dx + dw - w)); + + const spaceBelow = (dy + dh) - (cursorY + CURSOR_GAP); + const y = spaceBelow >= h + ? cursorY + CURSOR_GAP + : cursorY - h - CURSOR_GAP; + + return { x, y: Math.max(dy, y) }; } function stopResizePolling(): void { @@ -34,8 +42,8 @@ function startResizePolling(win: BrowserWindow): void { if (h > 0 && h !== lastH) { lastH = h; const clamped = Math.min(h, MAX_HEIGHT); - const bounds = win.getBounds(); - win.setBounds({ x: bounds.x, y: bounds.y, width: TOOLTIP_WIDTH, height: clamped }); + const pos = positionOnScreen(cursorOrigin.x, cursorOrigin.y, TOOLTIP_WIDTH, clamped); + win.setBounds({ x: pos.x, y: pos.y, width: TOOLTIP_WIDTH, height: clamped }); } } catch {} }, 150); @@ -55,8 +63,9 @@ export function destroySuggestion(): void { export function createSuggestionWindow(x: number, y: number): BrowserWindow { destroySuggestion(); + cursorOrigin = { x, y }; - const pos = clampToScreen(x, y + 20, TOOLTIP_WIDTH, TOOLTIP_HEIGHT); + const pos = positionOnScreen(x, y, TOOLTIP_WIDTH, TOOLTIP_HEIGHT); suggestionWindow = new BrowserWindow({ width: TOOLTIP_WIDTH,