feat: return 3 suggestion options from vision autocomplete agent

This commit is contained in:
CREDO23 2026-04-07 17:38:39 +02:00
parent 1a5d40e8f3
commit 55845d68ef

View file

@ -14,7 +14,9 @@ LLM call — the window title is used directly as the KB search query.
from __future__ import annotations from __future__ import annotations
import asyncio import asyncio
import json
import logging import logging
import re
import uuid import uuid
from collections.abc import AsyncGenerator from collections.abc import AsyncGenerator
from typing import Any from typing import Any
@ -61,13 +63,21 @@ Key behavior:
- If the text area already has text, continue it naturally typically just a sentence or two. - If the text area already has text, continue it naturally typically just a sentence or two.
Rules: Rules:
- Output ONLY the text to be inserted. No quotes, no explanations, no meta-commentary.
- Be CONCISE. Prefer a single paragraph or a few sentences. Autocomplete is a quick assist, not a full draft. - Be CONCISE. Prefer a single paragraph or a few sentences. Autocomplete is a quick assist, not a full draft.
- Match the tone and formality of the surrounding context. - Match the tone and formality of the surrounding context.
- If the screen shows code, write code. If it shows a casual chat, be casual. If it shows a formal email, be formal. - If the screen shows code, write code. If it shows a casual chat, be casual. If it shows a formal email, be formal.
- Do NOT describe the screenshot or explain your reasoning. - Do NOT describe the screenshot or explain your reasoning.
- Do NOT cite or reference documents explicitly just let the knowledge inform your writing naturally. - Do NOT cite or reference documents explicitly just let the knowledge inform your writing naturally.
- If you cannot determine what to write, output nothing. - If you cannot determine what to write, output an empty JSON array: []
## Output Format
You MUST provide exactly 3 different suggestion options. Each should be a distinct, plausible completion vary the tone, detail level, or angle.
Return your suggestions as a JSON array of exactly 3 strings. Output ONLY the JSON array, nothing else no markdown fences, no explanation, no commentary.
Example format:
["First suggestion text here.", "Second suggestion — a different take.", "Third option with another approach."]
## Filesystem Tools `ls`, `read_file`, `write_file`, `edit_file`, `glob`, `grep` ## Filesystem Tools `ls`, `read_file`, `write_file`, `edit_file`, `glob`, `grep`
@ -264,6 +274,50 @@ async def create_autocomplete_agent(
return agent, kb return agent, kb
# ---------------------------------------------------------------------------
# JSON suggestion parsing (robust fallback)
# ---------------------------------------------------------------------------
def _parse_suggestions(raw: str) -> list[str]:
"""Extract a list of suggestion strings from the agent's output.
Tries, in order:
1. Direct ``json.loads``
2. Extract content between ```json ... ``` fences
3. Find the first ``[`` ``]`` span
Falls back to wrapping the raw text as a single suggestion.
"""
text = raw.strip()
if not text:
return []
for candidate in _json_candidates(text):
try:
parsed = json.loads(candidate)
if isinstance(parsed, list) and all(isinstance(s, str) for s in parsed):
return [s for s in parsed if s.strip()]
except (json.JSONDecodeError, ValueError):
continue
return [text]
def _json_candidates(text: str) -> list[str]:
"""Yield candidate JSON strings from raw text."""
candidates = [text]
fence = re.search(r"```(?:json)?\s*\n?(.*?)```", text, re.DOTALL)
if fence:
candidates.append(fence.group(1).strip())
bracket = re.search(r"\[.*]", text, re.DOTALL)
if bracket:
candidates.append(bracket.group(0))
return candidates
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Streaming helper # Streaming helper
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@ -285,7 +339,7 @@ async def stream_autocomplete_agent(
thread_id = uuid.uuid4().hex thread_id = uuid.uuid4().hex
config = {"configurable": {"thread_id": thread_id}} config = {"configurable": {"thread_id": thread_id}}
current_text_id: str | None = None text_buffer: list[str] = []
active_tool_depth = 0 active_tool_depth = 0
thinking_step_counter = 0 thinking_step_counter = 0
tool_step_ids: dict[str, str] = {} tool_step_ids: dict[str, str] = {}
@ -315,14 +369,12 @@ async def stream_autocomplete_agent(
if emit_message_start: if emit_message_start:
yield streaming_service.format_message_start() yield streaming_service.format_message_start()
# Emit an initial "Generating completion" step so the UI immediately
# shows activity once the agent starts its first LLM call.
gen_step_id = next_thinking_step_id() gen_step_id = next_thinking_step_id()
last_active_step_id = gen_step_id last_active_step_id = gen_step_id
step_titles[gen_step_id] = "Generating completion" step_titles[gen_step_id] = "Generating suggestions"
yield streaming_service.format_thinking_step( yield streaming_service.format_thinking_step(
step_id=gen_step_id, step_id=gen_step_id,
title="Generating completion", title="Generating suggestions",
status="in_progress", status="in_progress",
) )
@ -341,15 +393,7 @@ async def stream_autocomplete_agent(
if chunk and hasattr(chunk, "content"): if chunk and hasattr(chunk, "content"):
content = chunk.content content = chunk.content
if content and isinstance(content, str): if content and isinstance(content, str):
if current_text_id is None: text_buffer.append(content)
step_event = complete_current_step()
if step_event:
yield step_event
current_text_id = streaming_service.generate_text_id()
yield streaming_service.format_text_start(current_text_id)
yield streaming_service.format_text_delta(
current_text_id, content
)
elif event_type == "on_tool_start": elif event_type == "on_tool_start":
active_tool_depth += 1 active_tool_depth += 1
@ -357,10 +401,6 @@ async def stream_autocomplete_agent(
run_id = event.get("run_id", "") run_id = event.get("run_id", "")
tool_input = event.get("data", {}).get("input", {}) tool_input = event.get("data", {}).get("input", {})
if current_text_id is not None:
yield streaming_service.format_text_end(current_text_id)
current_text_id = None
step_event = complete_current_step() step_event = complete_current_step()
if step_event: if step_event:
yield step_event yield step_event
@ -393,19 +433,22 @@ async def stream_autocomplete_agent(
if last_active_step_id == step_id: if last_active_step_id == step_id:
last_active_step_id = None last_active_step_id = None
if current_text_id is not None:
yield streaming_service.format_text_end(current_text_id)
step_event = complete_current_step() step_event = complete_current_step()
if step_event: if step_event:
yield step_event yield step_event
raw_text = "".join(text_buffer)
suggestions = _parse_suggestions(raw_text)
yield streaming_service.format_data(
"suggestions", {"options": suggestions}
)
yield streaming_service.format_finish() yield streaming_service.format_finish()
yield streaming_service.format_done() yield streaming_service.format_done()
except Exception as e: except Exception as e:
logger.error(f"Autocomplete agent streaming error: {e}", exc_info=True) logger.error(f"Autocomplete agent streaming error: {e}", exc_info=True)
if current_text_id is not None:
yield streaming_service.format_text_end(current_text_id)
yield streaming_service.format_error("Autocomplete failed. Please try again.") yield streaming_service.format_error("Autocomplete failed. Please try again.")
yield streaming_service.format_done() yield streaming_service.format_done()