feat: enhance vision autocomplete service and UI feedback

- Optimized the vision autocomplete service by starting the SSE stream immediately and deriving KB search queries directly from window titles. - Refactored the service to run KB filesystem pre-computation and agent graph compilation in parallel, improving performance. - Updated the SuggestionPage component to handle new agent step data, displaying progress indicators for each step. - Enhanced the CSS for the suggestion tooltip and agent activity indicators, improving the user interface and experience.
2026-06-26 21:39:43 +02:00 · 2026-04-07 02:49:24 -07:00 · 2026-04-07 02:49:24 -07:00 · bb1dcd32b6
commit bb1dcd32b6
parent 49441233e7
6 changed files with 686 additions and 228 deletions
--- a/surfsense_backend/app/agents/autocomplete/init.py
+++ b/surfsense_backend/app/agents/autocomplete/init.py
@ -0,0 +1,11 @@
 """Agent-based vision autocomplete with scoped filesystem exploration."""
 from app.agents.autocomplete.autocomplete_agent import (
    create_autocomplete_agent,
    stream_autocomplete_agent,
 )
 __all__ = [
    "create_autocomplete_agent",
    "stream_autocomplete_agent",
 ]
--- a/surfsense_backend/app/agents/autocomplete/autocomplete_agent.py
+++ b/surfsense_backend/app/agents/autocomplete/autocomplete_agent.py
@ -0,0 +1,429 @@
 """Vision autocomplete agent with scoped filesystem exploration.
 Converts the stateless single-shot vision autocomplete into an agent that
 seeds a virtual filesystem from KB search results and lets the vision LLM
 explore documents via ``ls``, ``read_file``, ``glob``, ``grep``, etc.
 before generating the final completion.
 Performance: KB search and agent graph compilation run in parallel so
 the only sequential latency is KB-search (or agent compile, whichever is
 slower) + the agent's LLM turns.  There is no separate "query extraction"
 LLM call — the window title is used directly as the KB search query.
 """
 from __future__ import annotations
 import asyncio
 import logging
 import uuid
 from typing import Any, AsyncGenerator
 from deepagents.graph import BASE_AGENT_PROMPT
 from deepagents.middleware.patch_tool_calls import PatchToolCallsMiddleware
 from langchain.agents import create_agent
 from langchain_anthropic.middleware import AnthropicPromptCachingMiddleware
 from langchain_core.language_models import BaseChatModel
 from langchain_core.messages import AIMessage, ToolMessage
 from app.agents.new_chat.middleware.filesystem import SurfSenseFilesystemMiddleware
 from app.agents.new_chat.middleware.knowledge_search import (
    build_scoped_filesystem,
    search_knowledge_base,
 )
 from app.services.new_streaming_service import VercelStreamingService
 logger = logging.getLogger(__name__)
 KB_TOP_K = 10
 # ---------------------------------------------------------------------------
 # System prompt
 # ---------------------------------------------------------------------------
 AUTOCOMPLETE_SYSTEM_PROMPT = """You are a smart writing assistant that analyzes the user's screen to draft or complete text.
 You will receive a screenshot of the user's screen. Your PRIMARY source of truth is the screenshot itself — the visual context determines what to write.
 Your job:
 1. Analyze the ENTIRE screenshot to understand what the user is working on (email thread, chat conversation, document, code editor, form, etc.).
 2. Identify the text area where the user will type.
 3. Generate the text the user most likely wants to write based on the visual context.
 You also have access to the user's knowledge base documents via filesystem tools. However:
 - ONLY consult the knowledge base if the screenshot clearly involves a topic where your KB documents are DIRECTLY relevant (e.g., the user is writing about a specific project/topic that matches a document title).
 - Do NOT explore documents just because they exist. Most autocomplete requests can be answered purely from the screenshot.
 - If you do read a document, only incorporate information that is 100% relevant to what the user is typing RIGHT NOW. Do not add extra details, background, or tangential information from the KB.
 - Keep your output SHORT — autocomplete should feel like a natural continuation, not an essay.
 Key behavior:
 - If the text area is EMPTY, draft a concise response or message based on what you see on screen (e.g., reply to an email, respond to a chat message, continue a document).
 - If the text area already has text, continue it naturally — typically just a sentence or two.
 Rules:
 - Output ONLY the text to be inserted. No quotes, no explanations, no meta-commentary.
 - Be CONCISE. Prefer a single paragraph or a few sentences. Autocomplete is a quick assist, not a full draft.
 - Match the tone and formality of the surrounding context.
 - If the screen shows code, write code. If it shows a casual chat, be casual. If it shows a formal email, be formal.
 - Do NOT describe the screenshot or explain your reasoning.
 - Do NOT cite or reference documents explicitly — just let the knowledge inform your writing naturally.
 - If you cannot determine what to write, output nothing.
 ## Filesystem Tools `ls`, `read_file`, `write_file`, `edit_file`, `glob`, `grep`
 All file paths must start with a `/`.
 - ls: list files and directories at a given path.
 - read_file: read a file from the filesystem.
 - write_file: create a temporary file in the session (not persisted).
 - edit_file: edit a file in the session (not persisted for /documents/ files).
 - glob: find files matching a pattern (e.g., "**/*.xml").
 - grep: search for text within files.
 ## When to Use Filesystem Tools
 BEFORE reaching for any tool, ask yourself: "Can I write a good completion purely from the screenshot?" If yes, just write it — do NOT explore the KB.
 Only use tools when:
 - The user is clearly writing about a specific topic that likely has detailed information in their KB.
 - You need a specific fact, name, number, or reference that the screenshot doesn't provide.
 When you do use tools, be surgical:
 - Check the `ls` output first. If no document title looks relevant, stop — do not read files just to see what's there.
 - If a title looks relevant, read only the `<chunk_index>` (first ~20 lines) and jump to matched chunks. Do not read entire documents.
 - Extract only the specific information you need and move on to generating the completion.
 ## Reading Documents Efficiently
 Documents are formatted as XML. Each document contains:
 - `<document_metadata>` — title, type, URL, etc.
 - `<chunk_index>` — a table of every chunk with its **line range** and a
  `matched="true"` flag for chunks that matched the search query.
 - `<document_content>` — the actual chunks in original document order.
 **Workflow**: read the first ~20 lines to see the `<chunk_index>`, identify
 chunks marked `matched="true"`, then use `read_file(path, offset=<start_line>,
 limit=<lines>)` to jump directly to those sections."""
 APP_CONTEXT_BLOCK = """
 The user is currently working in "{app_name}" (window: "{window_title}"). Use this to understand the type of application and adapt your tone and format accordingly."""
 def _build_autocomplete_system_prompt(app_name: str, window_title: str) -> str:
    prompt = AUTOCOMPLETE_SYSTEM_PROMPT
    if app_name:
        prompt += APP_CONTEXT_BLOCK.format(app_name=app_name, window_title=window_title)
    return prompt
 # ---------------------------------------------------------------------------
 # Pre-compute KB filesystem (runs in parallel with agent compilation)
 # ---------------------------------------------------------------------------
 class _KBResult:
    """Container for pre-computed KB filesystem results."""
    __slots__ = ("files", "ls_ai_msg", "ls_tool_msg")
    def __init__(
        self,
        files: dict[str, Any] | None = None,
        ls_ai_msg: AIMessage | None = None,
        ls_tool_msg: ToolMessage | None = None,
    ) -> None:
        self.files = files
        self.ls_ai_msg = ls_ai_msg
        self.ls_tool_msg = ls_tool_msg
    @property
    def has_documents(self) -> bool:
        return bool(self.files)
 async def precompute_kb_filesystem(
    search_space_id: int,
    query: str,
    top_k: int = KB_TOP_K,
 ) -> _KBResult:
    """Search the KB and build the scoped filesystem outside the agent.
    This is designed to be called via ``asyncio.gather`` alongside agent
    graph compilation so the two run concurrently.
    """
    if not query:
        return _KBResult()
    try:
        search_results = await search_knowledge_base(
            query=query,
            search_space_id=search_space_id,
            top_k=top_k,
        )
        if not search_results:
            return _KBResult()
        new_files, _ = await build_scoped_filesystem(
            documents=search_results,
            search_space_id=search_space_id,
        )
        if not new_files:
            return _KBResult()
        doc_paths = [
            p for p, v in new_files.items()
            if p.startswith("/documents/") and v is not None
        ]
        tool_call_id = f"auto_ls_{uuid.uuid4().hex[:12]}"
        ai_msg = AIMessage(
            content="",
            tool_calls=[{"name": "ls", "args": {"path": "/documents"}, "id": tool_call_id}],
        )
        tool_msg = ToolMessage(
            content=str(doc_paths) if doc_paths else "No documents found.",
            tool_call_id=tool_call_id,
        )
        return _KBResult(files=new_files, ls_ai_msg=ai_msg, ls_tool_msg=tool_msg)
    except Exception:
        logger.warning("KB pre-computation failed, proceeding without KB", exc_info=True)
        return _KBResult()
 # ---------------------------------------------------------------------------
 # Filesystem middleware — no save_document, no persistence
 # ---------------------------------------------------------------------------
 class AutocompleteFilesystemMiddleware(SurfSenseFilesystemMiddleware):
    """Filesystem middleware for autocomplete — read-only exploration only.
    Strips ``save_document`` (permanent KB persistence) and passes
    ``search_space_id=None`` so ``write_file`` / ``edit_file`` stay ephemeral.
    """
    def __init__(self) -> None:
        super().__init__(search_space_id=None, created_by_id=None)
        self.tools = [t for t in self.tools if t.name != "save_document"]
 # ---------------------------------------------------------------------------
 # Agent factory
 # ---------------------------------------------------------------------------
 async def _compile_agent(
    llm: BaseChatModel,
    app_name: str,
    window_title: str,
 ) -> Any:
    """Compile the agent graph (CPU-bound, runs in a thread)."""
    system_prompt = _build_autocomplete_system_prompt(app_name, window_title)
    final_system_prompt = system_prompt + "\n\n" + BASE_AGENT_PROMPT
    middleware = [
        AutocompleteFilesystemMiddleware(),
        PatchToolCallsMiddleware(),
        AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
    ]
    agent = await asyncio.to_thread(
        create_agent,
        llm,
        system_prompt=final_system_prompt,
        tools=[],
        middleware=middleware,
    )
    return agent.with_config({"recursion_limit": 200})
 async def create_autocomplete_agent(
    llm: BaseChatModel,
    *,
    search_space_id: int,
    kb_query: str,
    app_name: str = "",
    window_title: str = "",
 ) -> tuple[Any, _KBResult]:
    """Create the autocomplete agent and pre-compute KB in parallel.
    Returns ``(agent, kb_result)`` so the caller can inject the pre-computed
    filesystem into the agent's initial state without any middleware delay.
    """
    agent, kb = await asyncio.gather(
        _compile_agent(llm, app_name, window_title),
        precompute_kb_filesystem(search_space_id, kb_query),
    )
    return agent, kb
 # ---------------------------------------------------------------------------
 # Streaming helper
 # ---------------------------------------------------------------------------
 async def stream_autocomplete_agent(
    agent: Any,
    input_data: dict[str, Any],
    streaming_service: VercelStreamingService,
    *,
    emit_message_start: bool = True,
 ) -> AsyncGenerator[str, None]:
    """Stream agent events as Vercel SSE, with thinking steps for tool calls.
    When ``emit_message_start`` is False the caller has already sent the
    ``message_start`` event (e.g. to show preparation steps before the agent
    runs).
    """
    thread_id = uuid.uuid4().hex
    config = {"configurable": {"thread_id": thread_id}}
    current_text_id: str | None = None
    active_tool_depth = 0
    thinking_step_counter = 0
    tool_step_ids: dict[str, str] = {}
    step_titles: dict[str, str] = {}
    completed_step_ids: set[str] = set()
    last_active_step_id: str | None = None
    def next_thinking_step_id() -> str:
        nonlocal thinking_step_counter
        thinking_step_counter += 1
        return f"autocomplete-step-{thinking_step_counter}"
    def complete_current_step() -> str | None:
        nonlocal last_active_step_id
        if last_active_step_id and last_active_step_id not in completed_step_ids:
            completed_step_ids.add(last_active_step_id)
            title = step_titles.get(last_active_step_id, "Done")
            event = streaming_service.format_thinking_step(
                step_id=last_active_step_id,
                title=title,
                status="complete",
            )
            last_active_step_id = None
            return event
        return None
    if emit_message_start:
        yield streaming_service.format_message_start()
    # Emit an initial "Generating completion" step so the UI immediately
    # shows activity once the agent starts its first LLM call.
    gen_step_id = next_thinking_step_id()
    last_active_step_id = gen_step_id
    step_titles[gen_step_id] = "Generating completion"
    yield streaming_service.format_thinking_step(
        step_id=gen_step_id,
        title="Generating completion",
        status="in_progress",
    )
    try:
        async for event in agent.astream_events(input_data, config=config, version="v2"):
            event_type = event.get("event", "")
            if event_type == "on_chat_model_stream":
                if active_tool_depth > 0:
                    continue
                if "surfsense:internal" in event.get("tags", []):
                    continue
                chunk = event.get("data", {}).get("chunk")
                if chunk and hasattr(chunk, "content"):
                    content = chunk.content
                    if content and isinstance(content, str):
                        if current_text_id is None:
                            step_event = complete_current_step()
                            if step_event:
                                yield step_event
                            current_text_id = streaming_service.generate_text_id()
                            yield streaming_service.format_text_start(current_text_id)
                        yield streaming_service.format_text_delta(current_text_id, content)
            elif event_type == "on_tool_start":
                active_tool_depth += 1
                tool_name = event.get("name", "unknown_tool")
                run_id = event.get("run_id", "")
                tool_input = event.get("data", {}).get("input", {})
                if current_text_id is not None:
                    yield streaming_service.format_text_end(current_text_id)
                    current_text_id = None
                step_event = complete_current_step()
                if step_event:
                    yield step_event
                tool_step_id = next_thinking_step_id()
                tool_step_ids[run_id] = tool_step_id
                last_active_step_id = tool_step_id
                title, items = _describe_tool_call(tool_name, tool_input)
                step_titles[tool_step_id] = title
                yield streaming_service.format_thinking_step(
                    step_id=tool_step_id,
                    title=title,
                    status="in_progress",
                    items=items,
                )
            elif event_type == "on_tool_end":
                active_tool_depth = max(0, active_tool_depth - 1)
                run_id = event.get("run_id", "")
                step_id = tool_step_ids.pop(run_id, None)
                if step_id and step_id not in completed_step_ids:
                    completed_step_ids.add(step_id)
                    title = step_titles.get(step_id, "Done")
                    yield streaming_service.format_thinking_step(
                        step_id=step_id,
                        title=title,
                        status="complete",
                    )
                    if last_active_step_id == step_id:
                        last_active_step_id = None
        if current_text_id is not None:
            yield streaming_service.format_text_end(current_text_id)
        step_event = complete_current_step()
        if step_event:
            yield step_event
        yield streaming_service.format_finish()
        yield streaming_service.format_done()
    except Exception as e:
        logger.error(f"Autocomplete agent streaming error: {e}", exc_info=True)
        if current_text_id is not None:
            yield streaming_service.format_text_end(current_text_id)
        yield streaming_service.format_error("Autocomplete failed. Please try again.")
        yield streaming_service.format_done()
 def _describe_tool_call(tool_name: str, tool_input: Any) -> tuple[str, list[str]]:
    """Return a human-readable (title, items) for a tool call thinking step."""
    inp = tool_input if isinstance(tool_input, dict) else {}
    if tool_name == "ls":
        path = inp.get("path", "/")
        return "Listing files", [path]
    if tool_name == "read_file":
        fp = inp.get("file_path", "")
        display = fp if len(fp) <= 80 else "…" + fp[-77:]
        return "Reading file", [display]
    if tool_name == "write_file":
        fp = inp.get("file_path", "")
        display = fp if len(fp) <= 80 else "…" + fp[-77:]
        return "Writing file", [display]
    if tool_name == "edit_file":
        fp = inp.get("file_path", "")
        display = fp if len(fp) <= 80 else "…" + fp[-77:]
        return "Editing file", [display]
    if tool_name == "glob":
        pat = inp.get("pattern", "")
        base = inp.get("path", "/")
        return "Searching files", [f"{pat} in {base}"]
    if tool_name == "grep":
        pat = inp.get("pattern", "")
        path = inp.get("path", "")
        display_pat = pat[:60] + ("…" if len(pat) > 60 else "")
        return "Searching content", [f'"{display_pat}"' + (f" in {path}" if path else "")]
    return f"Using {tool_name}", []
--- a/surfsense_backend/app/services/vision_autocomplete_service.py
+++ b/surfsense_backend/app/services/vision_autocomplete_service.py
@ -1,139 +1,40 @@
 """Vision autocomplete service — agent-based with scoped filesystem.
 Optimized pipeline:
 1. Start the SSE stream immediately so the UI shows progress.
 2. Derive a KB search query from window_title (no separate LLM call).
 3. Run KB filesystem pre-computation and agent graph compilation in PARALLEL.
 4. Inject pre-computed KB files as initial state and stream the agent.
 """
 import logging
 from typing import AsyncGenerator
-from langchain_core.messages import HumanMessage, SystemMessage
+from langchain_core.messages import HumanMessage
 from sqlalchemy.ext.asyncio import AsyncSession
-from app.retriever.chunks_hybrid_search import ChucksHybridSearchRetriever
+from app.agents.autocomplete import create_autocomplete_agent, stream_autocomplete_agent
 from app.services.llm_service import get_vision_llm
 from app.services.new_streaming_service import VercelStreamingService
 logger = logging.getLogger(__name__)
-KB_TOP_K = 5
+PREP_STEP_ID = "autocomplete-prep"
 KB_MAX_CHARS = 4000
 EXTRACT_QUERY_PROMPT = """Look at this screenshot and describe in 1-2 short sentences what the user is working on and what topic they need to write about. Be specific about the subject matter. Output ONLY the description, nothing else."""
 EXTRACT_QUERY_PROMPT_WITH_APP = """The user is currently in the application "{app_name}" with the window titled "{window_title}".
 Look at this screenshot and describe in 1-2 short sentences what the user is working on and what topic they need to write about. Be specific about the subject matter. Output ONLY the description, nothing else."""
 VISION_SYSTEM_PROMPT = """You are a smart writing assistant that analyzes the user's screen to draft or complete text.
 You will receive a screenshot of the user's screen. Your job:
 1. Analyze the ENTIRE screenshot to understand what the user is working on (email thread, chat conversation, document, code editor, form, etc.).
 2. Identify the text area where the user will type.
 3. Based on the full visual context, generate the text the user most likely wants to write.
 Key behavior:
 - If the text area is EMPTY, draft a full response or message based on what you see on screen (e.g., reply to an email, respond to a chat message, continue a document).
 - If the text area already has text, continue it naturally.
 Rules:
 - Output ONLY the text to be inserted. No quotes, no explanations, no meta-commentary.
 - Be concise but complete — a full thought, not a fragment.
 - Match the tone and formality of the surrounding context.
 - If the screen shows code, write code. If it shows a casual chat, be casual. If it shows a formal email, be formal.
 - Do NOT describe the screenshot or explain your reasoning.
 - If you cannot determine what to write, output nothing."""
 APP_CONTEXT_BLOCK = """
 The user is currently working in "{app_name}" (window: "{window_title}"). Use this to understand the type of application and adapt your tone and format accordingly."""
 KB_CONTEXT_BLOCK = """
 You also have access to the user's knowledge base documents below. Use them to write more accurate, informed, and contextually relevant text. Do NOT cite or reference the documents explicitly — just let the knowledge inform your writing naturally.
 <knowledge_base>
 {kb_context}
 </knowledge_base>"""
-def _build_system_prompt(app_name: str, window_title: str, kb_context: str) -> str:
+def _derive_kb_query(app_name: str, window_title: str) -> str:
-    """Assemble the system prompt from optional context blocks."""
+    parts = [p for p in (window_title, app_name) if p]
-    prompt = VISION_SYSTEM_PROMPT
+    return " ".join(parts)
    if app_name:
        prompt += APP_CONTEXT_BLOCK.format(app_name=app_name, window_title=window_title)
    if kb_context:
        prompt += KB_CONTEXT_BLOCK.format(kb_context=kb_context)
    return prompt
 def _is_vision_unsupported_error(e: Exception) -> bool:
    """Check if an exception indicates the model doesn't support vision/images."""
    msg = str(e).lower()
    return "content must be a string" in msg or "does not support image" in msg
-async def _extract_query_from_screenshot(
+# ---------------------------------------------------------------------------
-    llm, screenshot_data_url: str,
+# Main entry point
-    app_name: str = "", window_title: str = "",
+# ---------------------------------------------------------------------------
 ) -> str | None:
    """Ask the Vision LLM to describe what the user is working on.
    Raises vision-unsupported errors so the caller can return a
    friendly message immediately instead of retrying with astream.
    """
    if app_name:
        prompt_text = EXTRACT_QUERY_PROMPT_WITH_APP.format(
            app_name=app_name, window_title=window_title,
        )
    else:
        prompt_text = EXTRACT_QUERY_PROMPT
    try:
        response = await llm.ainvoke([
            HumanMessage(content=[
                {"type": "text", "text": prompt_text},
                {"type": "image_url", "image_url": {"url": screenshot_data_url}},
            ]),
        ])
        query = response.content.strip() if hasattr(response, "content") else ""
        return query if query else None
    except Exception as e:
        if _is_vision_unsupported_error(e):
            raise
        logger.warning(f"Failed to extract query from screenshot: {e}")
        return None
 async def _search_knowledge_base(
    session: AsyncSession, search_space_id: int, query: str
 ) -> str:
    """Search the KB and return formatted context string."""
    try:
        retriever = ChucksHybridSearchRetriever(session)
        results = await retriever.hybrid_search(
            query_text=query,
            top_k=KB_TOP_K,
            search_space_id=search_space_id,
        )
        if not results:
            return ""
        parts: list[str] = []
        char_count = 0
        for doc in results:
            title = doc.get("document", {}).get("title", "Untitled")
            for chunk in doc.get("chunks", []):
                content = chunk.get("content", "").strip()
                if not content:
                    continue
                entry = f"[{title}]\n{content}"
                if char_count + len(entry) > KB_MAX_CHARS:
                    break
                parts.append(entry)
                char_count += len(entry)
            if char_count >= KB_MAX_CHARS:
                break
        return "\n\n---\n\n".join(parts)
    except Exception as e:
        logger.warning(f"KB search failed, proceeding without context: {e}")
        return ""
 async def stream_vision_autocomplete(
@ -144,13 +45,7 @@ async def stream_vision_autocomplete(
    app_name: str = "",
    window_title: str = "",
 ) -> AsyncGenerator[str, None]:
-    """Analyze a screenshot with the vision LLM and stream a text completion.
+    """Analyze a screenshot with a vision-LLM agent and stream a text completion."""
    Pipeline:
    1. Extract a search query from the screenshot (non-streaming)
    2. Search the knowledge base for relevant context
    3. Stream the final completion with screenshot + KB + app context
    """
    streaming = VercelStreamingService()
    vision_error_msg = (
        "The selected model does not support vision. "
@ -164,62 +59,89 @@ async def stream_vision_autocomplete(
        yield streaming.format_done()
        return
-    kb_context = ""
+    # Start SSE stream immediately so the UI has something to show
    yield streaming.format_message_start()
    kb_query = _derive_kb_query(app_name, window_title)
    # Show a preparation step while KB search + agent compile run
    yield streaming.format_thinking_step(
        step_id=PREP_STEP_ID,
        title="Searching knowledge base",
        status="in_progress",
        items=[kb_query] if kb_query else [],
    )
    try:
-        query = await _extract_query_from_screenshot(
+        agent, kb = await create_autocomplete_agent(
-            llm, screenshot_data_url, app_name=app_name, window_title=window_title,
+            llm,
            search_space_id=search_space_id,
            kb_query=kb_query,
            app_name=app_name,
            window_title=window_title,
        )
    except Exception as e:
-        logger.warning(f"Vision autocomplete: selected model does not support vision: {e}")
+        if _is_vision_unsupported_error(e):
-        yield streaming.format_message_start()
+            logger.warning("Vision autocomplete: model does not support vision: %s", e)
-        yield streaming.format_error(vision_error_msg)
+            yield streaming.format_error(vision_error_msg)
            yield streaming.format_done()
            return
        logger.error("Failed to create autocomplete agent: %s", e, exc_info=True)
        yield streaming.format_error("Autocomplete failed. Please try again.")
        yield streaming.format_done()
        return
-    if query:
+    has_kb = kb.has_documents
-        kb_context = await _search_knowledge_base(session, search_space_id, query)
+    doc_count = len(kb.files) if has_kb else 0  # type: ignore[arg-type]
-    system_prompt = _build_system_prompt(app_name, window_title, kb_context)
+    yield streaming.format_thinking_step(
        step_id=PREP_STEP_ID,
        title="Searching knowledge base",
        status="complete",
        items=[f"Found {doc_count} document{'s' if doc_count != 1 else ''}"] if kb_query else ["Skipped"],
    )
-    messages = [
+    # Build agent input with pre-computed KB as initial state
-        SystemMessage(content=system_prompt),
+    if has_kb:
-        HumanMessage(content=[
+        instruction = (
-            {
+            "Analyze this screenshot, then explore the knowledge base documents "
-                "type": "text",
+            "listed above — read the chunk index of any document whose title "
-                "text": "Analyze this screenshot. Understand the full context of what the user is working on, then generate the text they most likely want to write in the active text area.",
+            "looks relevant and check matched chunks for useful facts. "
-            },
+            "Finally, generate a concise autocomplete for the active text area, "
-            {
+            "enhanced with any relevant KB information you found."
-                "type": "image_url",
+        )
-                "image_url": {"url": screenshot_data_url},
+    else:
-            },
+        instruction = (
-        ]),
+            "Analyze this screenshot and generate a concise autocomplete "
-    ]
+            "for the active text area based on what you see."
        )
-    text_started = False
+    user_message = HumanMessage(content=[
-    text_id = ""
+        {"type": "text", "text": instruction},
        {"type": "image_url", "image_url": {"url": screenshot_data_url}},
    ])
    input_data: dict = {"messages": [user_message]}
    if has_kb:
        input_data["files"] = kb.files
        input_data["messages"] = [kb.ls_ai_msg, kb.ls_tool_msg, user_message]
        logger.info("Autocomplete: injected %d KB files into agent initial state", doc_count)
    else:
        logger.info("Autocomplete: no KB documents found, proceeding with screenshot only")
    # Stream the agent (message_start already sent above)
    try:
-        yield streaming.format_message_start()
+        async for sse in stream_autocomplete_agent(
-        text_id = streaming.generate_text_id()
+            agent, input_data, streaming, emit_message_start=False,
-        yield streaming.format_text_start(text_id)
+        ):
-        text_started = True
+            yield sse
        async for chunk in llm.astream(messages):
            token = chunk.content if hasattr(chunk, "content") else str(chunk)
            if token:
                yield streaming.format_text_delta(text_id, token)
        yield streaming.format_text_end(text_id)
        yield streaming.format_finish()
        yield streaming.format_done()
    except Exception as e:
        if text_started:
            yield streaming.format_text_end(text_id)
        if _is_vision_unsupported_error(e):
-            logger.warning(f"Vision autocomplete: selected model does not support vision: {e}")
+            logger.warning("Vision autocomplete: model does not support vision: %s", e)
            yield streaming.format_error(vision_error_msg)
            yield streaming.format_done()
        else:
-            logger.error(f"Vision autocomplete streaming error: {e}", exc_info=True)
+            logger.error("Vision autocomplete streaming error: %s", e, exc_info=True)
            yield streaming.format_error("Autocomplete failed. Please try again.")
-        yield streaming.format_done()
+            yield streaming.format_done()
--- a/surfsense_web/app/desktop/suggestion/page.tsx
+++ b/surfsense_web/app/desktop/suggestion/page.tsx
@ -10,7 +10,18 @@ type SSEEvent =
 	| { type: "text-end"; id: string }
 	| { type: "start"; messageId: string }
 	| { type: "finish" }
-	| { type: "error"; errorText: string };
+	| { type: "error"; errorText: string }
 	| {
 			type: "data-thinking-step";
 			data: { id: string; title: string; status: string; items: string[] };
 	  };
 interface AgentStep {
 	id: string;
 	title: string;
 	status: string;
 	items: string[];
 }
 function friendlyError(raw: string | number): string {
 	if (typeof raw === "number") {
@ -34,11 +45,24 @@ function friendlyError(raw: string | number): string {
 const AUTO_DISMISS_MS = 3000;
 function StepIcon({ status }: { status: string }) {
 	if (status === "complete") {
 		return (
 			<svg className="step-icon step-icon-done" viewBox="0 0 16 16" fill="none">
 				<circle cx="8" cy="8" r="7" stroke="#4ade80" strokeWidth="1.5" />
 				<path d="M5 8.5l2 2 4-4.5" stroke="#4ade80" strokeWidth="1.5" strokeLinecap="round" strokeLinejoin="round" />
 			</svg>
 		);
 	}
 	return <span className="step-spinner" />;
 }
 export default function SuggestionPage() {
 	const api = useElectronAPI();
 	const [suggestion, setSuggestion] = useState("");
 	const [isLoading, setIsLoading] = useState(true);
 	const [error, setError] = useState<string | null>(null);
 	const [steps, setSteps] = useState<AgentStep[]>([]);
 	const abortRef = useRef<AbortController | null>(null);
 	const isDesktop = !!api?.onAutocompleteContext;
@ -66,6 +90,7 @@ export default function SuggestionPage() {
 			setIsLoading(true);
 			setSuggestion("");
 			setError(null);
 			setSteps([]);
 			let token = getBearerToken();
 			if (!token) {
@ -137,6 +162,17 @@ export default function SuggestionPage() {
 									setSuggestion((prev) => prev + parsed.delta);
 								} else if (parsed.type === "error") {
 									setError(friendlyError(parsed.errorText));
 								} else if (parsed.type === "data-thinking-step") {
 									const { id, title, status, items } = parsed.data;
 									setSteps((prev) => {
 										const existing = prev.findIndex((s) => s.id === id);
 										if (existing >= 0) {
 											const updated = [...prev];
 											updated[existing] = { id, title, status, items };
 											return updated;
 										}
 										return [...prev, { id, title, status, items }];
 									});
 								}
 							} catch {
 								continue;
@ -185,13 +221,33 @@ export default function SuggestionPage() {
 		);
 	}
-	if (isLoading && !suggestion) {
+	const showLoading = isLoading && !suggestion;
 	if (showLoading) {
 		return (
 			<div className="suggestion-tooltip">
-				<div className="suggestion-loading">
+				<div className="agent-activity">
-					<span className="suggestion-dot" />
+					{steps.length === 0 && (
-					<span className="suggestion-dot" />
+						<div className="activity-initial">
-					<span className="suggestion-dot" />
+							<span className="step-spinner" />
 							<span className="activity-label">Preparing…</span>
 						</div>
 					)}
 					{steps.length > 0 && (
 						<div className="activity-steps">
 							{steps.map((step) => (
 								<div key={step.id} className="activity-step">
 									<StepIcon status={step.status} />
 									<span className="step-label">
 										{step.title}
 										{step.items.length > 0 && (
 											<span className="step-detail"> · {step.items[0]}</span>
 										)}
 									</span>
 								</div>
 							))}
 						</div>
 					)}
 				</div>
 			</div>
 		);
--- a/surfsense_web/app/desktop/suggestion/suggestion.css
+++ b/surfsense_web/app/desktop/suggestion/suggestion.css
@ -19,13 +19,21 @@ body:has(.suggestion-body) {
 }
 .suggestion-tooltip {
  box-sizing: border-box;
  background: #1e1e1e;
  border: 1px solid #3c3c3c;
  border-radius: 8px;
  padding: 8px 12px;
  margin: 4px;
  max-width: 400px;
  /* MAX_HEIGHT in suggestion-window.ts is 400px. Subtract 8px for margin
     (4px * 2) so the tooltip + margin fits within the Electron window.
     box-sizing: border-box ensures padding + border are included. */
  max-height: 392px;
  box-shadow: 0 4px 16px rgba(0, 0, 0, 0.5);
  display: flex;
  flex-direction: column;
  overflow: hidden;
 }
 .suggestion-text {
@ -35,6 +43,26 @@ body:has(.suggestion-body) {
  margin: 0 0 6px 0;
  word-wrap: break-word;
  white-space: pre-wrap;
  overflow-y: auto;
  flex: 1 1 auto;
  min-height: 0;
 }
 .suggestion-text::-webkit-scrollbar {
  width: 5px;
 }
 .suggestion-text::-webkit-scrollbar-track {
  background: transparent;
 }
 .suggestion-text::-webkit-scrollbar-thumb {
  background: #555;
  border-radius: 3px;
 }
 .suggestion-text::-webkit-scrollbar-thumb:hover {
  background: #777;
 }
 .suggestion-actions {
@ -43,6 +71,7 @@ body:has(.suggestion-body) {
  gap: 4px;
  border-top: 1px solid #2a2a2a;
  padding-top: 6px;
  flex-shrink: 0;
 }
 .suggestion-btn {
@ -86,36 +115,77 @@ body:has(.suggestion-body) {
  font-size: 12px;
 }
-.suggestion-loading {
+/* --- Agent activity indicator --- */
 .agent-activity {
  display: flex;
-  gap: 5px;
+  flex-direction: column;
  gap: 4px;
  overflow-y: auto;
  max-height: 340px;
 }
 .activity-initial {
  display: flex;
  align-items: center;
  gap: 8px;
  padding: 2px 0;
  justify-content: center;
 }
-.suggestion-dot {
+.activity-label {
-  width: 4px;
+  color: #a1a1aa;
-  height: 4px;
+  font-size: 12px;
  white-space: nowrap;
  overflow: hidden;
  text-overflow: ellipsis;
 }
 .activity-steps {
  display: flex;
  flex-direction: column;
  gap: 3px;
 }
 .activity-step {
  display: flex;
  align-items: center;
  gap: 6px;
  min-height: 18px;
 }
 .step-label {
  color: #d4d4d4;
  font-size: 12px;
  white-space: nowrap;
  overflow: hidden;
  text-overflow: ellipsis;
 }
 .step-detail {
  color: #71717a;
  font-size: 11px;
 }
 /* Spinner (in_progress) */
 .step-spinner {
  width: 14px;
  height: 14px;
  flex-shrink: 0;
  border: 1.5px solid #3f3f46;
  border-top-color: #a78bfa;
  border-radius: 50%;
-  background: #666;
+  animation: step-spin 0.7s linear infinite;
  animation: suggestion-pulse 1.2s infinite ease-in-out;
 }
-.suggestion-dot:nth-child(2) {
+/* Checkmark icon (complete) */
-  animation-delay: 0.15s;
+.step-icon {
  width: 14px;
  height: 14px;
  flex-shrink: 0;
 }
-.suggestion-dot:nth-child(3) {
+@keyframes step-spin {
-  animation-delay: 0.3s;
+  to {
-}
+    transform: rotate(360deg);
@keyframes suggestion-pulse {
  0%, 80%, 100% {
    opacity: 0.3;
    transform: scale(0.8);
  }
  40% {
    opacity: 1;
    transform: scale(1.1);
  }
 }
--- a/surfsense_web/components/assistant-ui/thread.tsx
+++ b/surfsense_web/components/assistant-ui/thread.tsx
@ -92,15 +92,7 @@ import { useMediaQuery } from "@/hooks/use-media-query";
 import { useElectronAPI } from "@/hooks/use-platform";
 import { cn } from "@/lib/utils";
-/** Placeholder texts that cycle in new chats when input is empty */
+const COMPOSER_PLACEHOLDER = "Ask anything · Type / for prompts · Type @ to mention docs";
 const CYCLING_PLACEHOLDERS = [
 	"Ask SurfSense anything or @mention docs",
 	"Generate a podcast from my vacation ideas in Notion",
 	"Sum up last week's meeting notes from Drive in a bulleted list",
 	"Give me a brief overview of the most urgent tickets in Jira and Linear",
 	"Briefly, what are today's top ten important emails and calendar events?",
 	"Check if this week's Slack messages reference any GitHub issues",
 ];
 export const Thread: FC = () => {
 	return <ThreadContent />;
@ -380,29 +372,7 @@ const Composer: FC = () => {
 	const isThreadEmpty = useAuiState(({ thread }) => thread.isEmpty);
 	const isThreadRunning = useAuiState(({ thread }) => thread.isRunning);
-	// Cycling placeholder state - only cycles in new chats
+	const currentPlaceholder = COMPOSER_PLACEHOLDER;
 	const [placeholderIndex, setPlaceholderIndex] = useState(0);
 	// Cycle through placeholders every 4 seconds when thread is empty (new chat)
 	useEffect(() => {
 		// Only cycle when thread is empty (new chat)
 		if (!isThreadEmpty) {
 			// Reset to first placeholder when chat becomes active
 			setPlaceholderIndex(0);
 			return;
 		}
 		const intervalId = setInterval(() => {
 			setPlaceholderIndex((prev) => (prev + 1) % CYCLING_PLACEHOLDERS.length);
 		}, 6000);
 		return () => clearInterval(intervalId);
 	}, [isThreadEmpty]);
 	// Compute current placeholder - only cycle in new chats
 	const currentPlaceholder = isThreadEmpty
 		? CYCLING_PLACEHOLDERS[placeholderIndex]
 		: CYCLING_PLACEHOLDERS[0];
 	// Live collaboration state
 	const { data: currentUser } = useAtomValue(currentUserAtom);