feat: return 3 suggestion options from vision autocomplete agent

2026-04-29 02:46:25 +02:00 · 2026-04-07 17:38:39 +02:00 · 2026-04-07 17:38:39 +02:00 · 55845d68ef
commit 55845d68ef
parent 1a5d40e8f3
1 changed files with 67 additions and 24 deletions
--- a/surfsense_backend/app/agents/autocomplete/autocomplete_agent.py
+++ b/surfsense_backend/app/agents/autocomplete/autocomplete_agent.py
@ -14,7 +14,9 @@ LLM call — the window title is used directly as the KB search query.
 from __future__ import annotations

 import asyncio
+import json
 import logging
+import re
 import uuid
 from collections.abc import AsyncGenerator
 from typing import Any
@ -61,13 +63,21 @@ Key behavior:
 - If the text area already has text, continue it naturally — typically just a sentence or two.

 Rules:
- Output ONLY the text to be inserted. No quotes, no explanations, no meta-commentary.
 - Be CONCISE. Prefer a single paragraph or a few sentences. Autocomplete is a quick assist, not a full draft.
 - Match the tone and formality of the surrounding context.
 - If the screen shows code, write code. If it shows a casual chat, be casual. If it shows a formal email, be formal.
 - Do NOT describe the screenshot or explain your reasoning.
 - Do NOT cite or reference documents explicitly — just let the knowledge inform your writing naturally.
- If you cannot determine what to write, output nothing.
+- If you cannot determine what to write, output an empty JSON array: []
+
+## Output Format
+
+You MUST provide exactly 3 different suggestion options. Each should be a distinct, plausible completion — vary the tone, detail level, or angle.
+
+Return your suggestions as a JSON array of exactly 3 strings. Output ONLY the JSON array, nothing else — no markdown fences, no explanation, no commentary.
+
+Example format:
+["First suggestion text here.", "Second suggestion — a different take.", "Third option with another approach."]

 ## Filesystem Tools `ls`, `read_file`, `write_file`, `edit_file`, `glob`, `grep`

@ -264,6 +274,50 @@ async def create_autocomplete_agent(
    return agent, kb


+# ---------------------------------------------------------------------------
+# JSON suggestion parsing (robust fallback)
+# ---------------------------------------------------------------------------
+
+
+def _parse_suggestions(raw: str) -> list[str]:
+    """Extract a list of suggestion strings from the agent's output.
+
+    Tries, in order:
+      1. Direct ``json.loads``
+      2. Extract content between ```json ... ``` fences
+      3. Find the first ``[`` … ``]`` span
+    Falls back to wrapping the raw text as a single suggestion.
+    """
+    text = raw.strip()
+    if not text:
+        return []
+
+    for candidate in _json_candidates(text):
+        try:
+            parsed = json.loads(candidate)
+            if isinstance(parsed, list) and all(isinstance(s, str) for s in parsed):
+                return [s for s in parsed if s.strip()]
+        except (json.JSONDecodeError, ValueError):
+            continue
+
+    return [text]
+
+
+def _json_candidates(text: str) -> list[str]:
+    """Yield candidate JSON strings from raw text."""
+    candidates = [text]
+
+    fence = re.search(r"```(?:json)?\s*\n?(.*?)```", text, re.DOTALL)
+    if fence:
+        candidates.append(fence.group(1).strip())
+
+    bracket = re.search(r"\[.*]", text, re.DOTALL)
+    if bracket:
+        candidates.append(bracket.group(0))
+
+    return candidates
+
+
 # ---------------------------------------------------------------------------
 # Streaming helper
 # ---------------------------------------------------------------------------
@ -285,7 +339,7 @@ async def stream_autocomplete_agent(
    thread_id = uuid.uuid4().hex
    config = {"configurable": {"thread_id": thread_id}}

-    current_text_id: str | None = None
+    text_buffer: list[str] = []
    active_tool_depth = 0
    thinking_step_counter = 0
    tool_step_ids: dict[str, str] = {}
@ -315,14 +369,12 @@ async def stream_autocomplete_agent(
    if emit_message_start:
        yield streaming_service.format_message_start()

-    # Emit an initial "Generating completion" step so the UI immediately
-    # shows activity once the agent starts its first LLM call.
    gen_step_id = next_thinking_step_id()
    last_active_step_id = gen_step_id
-    step_titles[gen_step_id] = "Generating completion"
+    step_titles[gen_step_id] = "Generating suggestions"
    yield streaming_service.format_thinking_step(
        step_id=gen_step_id,
-        title="Generating completion",
+        title="Generating suggestions",
        status="in_progress",
    )

@ -341,15 +393,7 @@ async def stream_autocomplete_agent(
                if chunk and hasattr(chunk, "content"):
                    content = chunk.content
                    if content and isinstance(content, str):
-                        if current_text_id is None:
-                            step_event = complete_current_step()
-                            if step_event:
-                                yield step_event
-                            current_text_id = streaming_service.generate_text_id()
-                            yield streaming_service.format_text_start(current_text_id)
-                        yield streaming_service.format_text_delta(
-                            current_text_id, content
-                        )
+                        text_buffer.append(content)

            elif event_type == "on_tool_start":
                active_tool_depth += 1
@ -357,10 +401,6 @@ async def stream_autocomplete_agent(
                run_id = event.get("run_id", "")
                tool_input = event.get("data", {}).get("input", {})

-                if current_text_id is not None:
-                    yield streaming_service.format_text_end(current_text_id)
-                    current_text_id = None
-
                step_event = complete_current_step()
                if step_event:
                    yield step_event
@ -393,19 +433,22 @@ async def stream_autocomplete_agent(
                    if last_active_step_id == step_id:
                        last_active_step_id = None

-        if current_text_id is not None:
-            yield streaming_service.format_text_end(current_text_id)
        step_event = complete_current_step()
        if step_event:
            yield step_event

+        raw_text = "".join(text_buffer)
+        suggestions = _parse_suggestions(raw_text)
+
+        yield streaming_service.format_data(
+            "suggestions", {"options": suggestions}
+        )
+
        yield streaming_service.format_finish()
        yield streaming_service.format_done()

    except Exception as e:
        logger.error(f"Autocomplete agent streaming error: {e}", exc_info=True)
-        if current_text_id is not None:
-            yield streaming_service.format_text_end(current_text_id)
        yield streaming_service.format_error("Autocomplete failed. Please try again.")
        yield streaming_service.format_done()