refactor: extract autocomplete service and fix tooltip screen-edge positioning

2026-04-26 01:06:23 +02:00 · 2026-04-02 20:38:09 +02:00 · 2026-04-02 20:38:09 +02:00 · 3e68d4aa3e
commit 3e68d4aa3e
parent 9c1d9357c4
3 changed files with 130 additions and 116 deletions
--- a/surfsense_backend/app/routes/autocomplete_routes.py
+++ b/surfsense_backend/app/routes/autocomplete_routes.py
@ -1,118 +1,14 @@
 import logging
 from typing import AsyncGenerator
 from fastapi import APIRouter, Depends, Query
 from fastapi.responses import StreamingResponse
 from langchain_core.messages import HumanMessage, SystemMessage
 from sqlalchemy.ext.asyncio import AsyncSession
 from app.db import User, get_async_session
-from app.retriever.chunks_hybrid_search import ChucksHybridSearchRetriever
+from app.services.autocomplete_service import stream_autocomplete
 from app.services.llm_service import get_agent_llm
 from app.services.new_streaming_service import VercelStreamingService
 from app.users import current_active_user
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/autocomplete", tags=["autocomplete"])
 AUTOCOMPLETE_SYSTEM_PROMPT = """You are an inline text autocomplete engine. Your job is to complete the user's text naturally.
 Rules:
 - Output ONLY the continuation text. Do NOT repeat what the user already typed.
 - Keep completions concise: 1-3 sentences maximum.
 - Match the user's tone, style, and language.
 - If knowledge base context is provided, use it to make the completion factually accurate and personalized.
 - Do NOT add quotes, explanations, or meta-commentary.
 - Do NOT start with a space unless grammatically required.
 - If you cannot produce a useful completion, output nothing."""
 KB_CONTEXT_TEMPLATE = """
 Relevant knowledge base context (use this to personalize the completion):
 ---
 {kb_context}
 ---
 """
 async def _stream_autocomplete(
    text: str,
    cursor_position: int,
    search_space_id: int,
    session: AsyncSession,
 ) -> AsyncGenerator[str, None]:
    """Stream an autocomplete response with KB context."""
    streaming_service = VercelStreamingService()
    try:
        # Text before cursor is what we're completing
        text_before_cursor = text[:cursor_position] if cursor_position >= 0 else text
        if not text_before_cursor.strip():
            yield streaming_service.format_message_start()
            yield streaming_service.format_finish()
            yield streaming_service.format_done()
            return
        # Fast KB lookup: vector-only search, top 3 chunks, no planner LLM
        kb_context = ""
        try:
            retriever = ChucksHybridSearchRetriever(session)
            chunks = await retriever.vector_search(
                query_text=text_before_cursor[-200:],  # last 200 chars for relevance
                top_k=3,
                search_space_id=search_space_id,
            )
            if chunks:
                kb_snippets = []
                for chunk in chunks:
                    content = getattr(chunk, "content", None) or getattr(chunk, "chunk_text", "")
                    if content:
                        kb_snippets.append(content[:300])
                if kb_snippets:
                    kb_context = KB_CONTEXT_TEMPLATE.format(
                        kb_context="\n\n".join(kb_snippets)
                    )
        except Exception as e:
            logger.warning(f"KB search failed for autocomplete, proceeding without context: {e}")
        # Get the search space's configured LLM
        llm = await get_agent_llm(session, search_space_id)
        if not llm:
            yield streaming_service.format_message_start()
            error_msg = "No LLM configured for this search space"
            yield streaming_service.format_error(error_msg)
            yield streaming_service.format_done()
            return
        system_prompt = AUTOCOMPLETE_SYSTEM_PROMPT
        if kb_context:
            system_prompt += kb_context
        messages = [
            SystemMessage(content=system_prompt),
            HumanMessage(content=f"Complete this text:\n{text_before_cursor}"),
        ]
        # Stream the response
        yield streaming_service.format_message_start()
        text_id = streaming_service.generate_text_id()
        yield streaming_service.format_text_start(text_id)
        async for chunk in llm.astream(messages):
            token = chunk.content if hasattr(chunk, "content") else str(chunk)
            if token:
                yield streaming_service.format_text_delta(text_id, token)
        yield streaming_service.format_text_end(text_id)
        yield streaming_service.format_finish()
        yield streaming_service.format_done()
    except Exception as e:
        logger.error(f"Autocomplete streaming error: {e}")
        yield streaming_service.format_error(str(e))
        yield streaming_service.format_done()
@router.post("/stream")
 async def autocomplete_stream(
@ -122,12 +18,11 @@ async def autocomplete_stream(
    user: User = Depends(current_active_user),
    session: AsyncSession = Depends(get_async_session),
 ):
    """Stream an autocomplete suggestion based on the current text and KB context."""
    if cursor_position < 0:
        cursor_position = len(text)
    return StreamingResponse(
-        _stream_autocomplete(text, cursor_position, search_space_id, session),
+        stream_autocomplete(text, cursor_position, search_space_id, session),
        media_type="text/event-stream",
        headers={
            **VercelStreamingService.get_response_headers(),
--- a/surfsense_backend/app/services/autocomplete_service.py
+++ b/surfsense_backend/app/services/autocomplete_service.py
@ -0,0 +1,110 @@
 import logging
 from typing import AsyncGenerator
 from langchain_core.messages import HumanMessage, SystemMessage
 from sqlalchemy.ext.asyncio import AsyncSession
 from app.retriever.chunks_hybrid_search import ChucksHybridSearchRetriever
 from app.services.llm_service import get_agent_llm
 from app.services.new_streaming_service import VercelStreamingService
 logger = logging.getLogger(__name__)
 SYSTEM_PROMPT = """You are an inline text autocomplete engine. Your job is to complete the user's text naturally.
 Rules:
 - Output ONLY the continuation text. Do NOT repeat what the user already typed.
 - Keep completions concise: 1-3 sentences maximum.
 - Match the user's tone, style, and language.
 - If knowledge base context is provided, use it to make the completion factually accurate and personalized.
 - Do NOT add quotes, explanations, or meta-commentary.
 - Do NOT start with a space unless grammatically required.
 - If you cannot produce a useful completion, output nothing."""
 KB_CONTEXT_TEMPLATE = """
 Relevant knowledge base context (use this to personalize the completion):
 ---
 {kb_context}
 ---
 """
 async def _retrieve_kb_context(
    session: AsyncSession,
    text: str,
    search_space_id: int,
 ) -> str:
    try:
        retriever = ChucksHybridSearchRetriever(session)
        chunks = await retriever.vector_search(
            query_text=text[-200:],
            top_k=3,
            search_space_id=search_space_id,
        )
        if not chunks:
            return ""
        snippets = []
        for chunk in chunks:
            content = getattr(chunk, "content", None) or getattr(chunk, "chunk_text", "")
            if content:
                snippets.append(content[:300])
        if not snippets:
            return ""
        return KB_CONTEXT_TEMPLATE.format(kb_context="\n\n".join(snippets))
    except Exception as e:
        logger.warning(f"KB search failed for autocomplete, proceeding without context: {e}")
        return ""
 async def stream_autocomplete(
    text: str,
    cursor_position: int,
    search_space_id: int,
    session: AsyncSession,
 ) -> AsyncGenerator[str, None]:
    """Build context, call the LLM, and yield SSE-formatted tokens."""
    streaming = VercelStreamingService()
    text_before_cursor = text[:cursor_position] if cursor_position >= 0 else text
    if not text_before_cursor.strip():
        yield streaming.format_message_start()
        yield streaming.format_finish()
        yield streaming.format_done()
        return
    kb_context = await _retrieve_kb_context(session, text_before_cursor, search_space_id)
    llm = await get_agent_llm(session, search_space_id)
    if not llm:
        yield streaming.format_message_start()
        yield streaming.format_error("No LLM configured for this search space")
        yield streaming.format_done()
        return
    system_prompt = SYSTEM_PROMPT
    if kb_context:
        system_prompt += kb_context
    messages = [
        SystemMessage(content=system_prompt),
        HumanMessage(content=f"Complete this text:\n{text_before_cursor}"),
    ]
    try:
        yield streaming.format_message_start()
        text_id = streaming.generate_text_id()
        yield streaming.format_text_start(text_id)
        async for chunk in llm.astream(messages):
            token = chunk.content if hasattr(chunk, "content") else str(chunk)
            if token:
                yield streaming.format_text_delta(text_id, token)
        yield streaming.format_text_end(text_id)
        yield streaming.format_finish()
        yield streaming.format_done()
    except Exception as e:
        logger.error(f"Autocomplete streaming error: {e}")
        yield streaming.format_error(str(e))
        yield streaming.format_done()
--- a/surfsense_desktop/src/modules/autocomplete/suggestion-window.ts
+++ b/surfsense_desktop/src/modules/autocomplete/suggestion-window.ts
@ -8,14 +8,22 @@ const MAX_HEIGHT = 400;
 let suggestionWindow: BrowserWindow | null = null;
 let resizeTimer: ReturnType<typeof setInterval> | null = null;
 let cursorOrigin = { x: 0, y: 0 };
-function clampToScreen(x: number, y: number, w: number, h: number): { x: number; y: number } {
+const CURSOR_GAP = 20;
-  const display = screen.getDisplayNearestPoint({ x, y });
+
 function positionOnScreen(cursorX: number, cursorY: number, w: number, h: number): { x: number; y: number } {
  const display = screen.getDisplayNearestPoint({ x: cursorX, y: cursorY });
  const { x: dx, y: dy, width: dw, height: dh } = display.workArea;
-  return {
+
-    x: Math.max(dx, Math.min(x, dx + dw - w)),
+  const x = Math.max(dx, Math.min(cursorX, dx + dw - w));
-    y: Math.max(dy, Math.min(y, dy + dh - h)),
+
-  };
+  const spaceBelow = (dy + dh) - (cursorY + CURSOR_GAP);
  const y = spaceBelow >= h
    ? cursorY + CURSOR_GAP
    : cursorY - h - CURSOR_GAP;
  return { x, y: Math.max(dy, y) };
 }
 function stopResizePolling(): void {
@ -34,8 +42,8 @@ function startResizePolling(win: BrowserWindow): void {
      if (h > 0 && h !== lastH) {
        lastH = h;
        const clamped = Math.min(h, MAX_HEIGHT);
-        const bounds = win.getBounds();
+        const pos = positionOnScreen(cursorOrigin.x, cursorOrigin.y, TOOLTIP_WIDTH, clamped);
-        win.setBounds({ x: bounds.x, y: bounds.y, width: TOOLTIP_WIDTH, height: clamped });
+        win.setBounds({ x: pos.x, y: pos.y, width: TOOLTIP_WIDTH, height: clamped });
      }
    } catch {}
  }, 150);
@ -55,8 +63,9 @@ export function destroySuggestion(): void {
 export function createSuggestionWindow(x: number, y: number): BrowserWindow {
  destroySuggestion();
  cursorOrigin = { x, y };
-  const pos = clampToScreen(x, y + 20, TOOLTIP_WIDTH, TOOLTIP_HEIGHT);
+  const pos = positionOnScreen(x, y, TOOLTIP_WIDTH, TOOLTIP_HEIGHT);
  suggestionWindow = new BrowserWindow({
    width: TOOLTIP_WIDTH,