mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-26 21:39:43 +02:00
feat: enhance vision autocomplete service and UI feedback
- Optimized the vision autocomplete service by starting the SSE stream immediately and deriving KB search queries directly from window titles. - Refactored the service to run KB filesystem pre-computation and agent graph compilation in parallel, improving performance. - Updated the SuggestionPage component to handle new agent step data, displaying progress indicators for each step. - Enhanced the CSS for the suggestion tooltip and agent activity indicators, improving the user interface and experience.
This commit is contained in:
parent
49441233e7
commit
bb1dcd32b6
6 changed files with 686 additions and 228 deletions
11
surfsense_backend/app/agents/autocomplete/__init__.py
Normal file
11
surfsense_backend/app/agents/autocomplete/__init__.py
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
"""Agent-based vision autocomplete with scoped filesystem exploration."""
|
||||||
|
|
||||||
|
from app.agents.autocomplete.autocomplete_agent import (
|
||||||
|
create_autocomplete_agent,
|
||||||
|
stream_autocomplete_agent,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"create_autocomplete_agent",
|
||||||
|
"stream_autocomplete_agent",
|
||||||
|
]
|
||||||
429
surfsense_backend/app/agents/autocomplete/autocomplete_agent.py
Normal file
429
surfsense_backend/app/agents/autocomplete/autocomplete_agent.py
Normal file
|
|
@ -0,0 +1,429 @@
|
||||||
|
"""Vision autocomplete agent with scoped filesystem exploration.
|
||||||
|
|
||||||
|
Converts the stateless single-shot vision autocomplete into an agent that
|
||||||
|
seeds a virtual filesystem from KB search results and lets the vision LLM
|
||||||
|
explore documents via ``ls``, ``read_file``, ``glob``, ``grep``, etc.
|
||||||
|
before generating the final completion.
|
||||||
|
|
||||||
|
Performance: KB search and agent graph compilation run in parallel so
|
||||||
|
the only sequential latency is KB-search (or agent compile, whichever is
|
||||||
|
slower) + the agent's LLM turns. There is no separate "query extraction"
|
||||||
|
LLM call — the window title is used directly as the KB search query.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import uuid
|
||||||
|
from typing import Any, AsyncGenerator
|
||||||
|
|
||||||
|
from deepagents.graph import BASE_AGENT_PROMPT
|
||||||
|
from deepagents.middleware.patch_tool_calls import PatchToolCallsMiddleware
|
||||||
|
from langchain.agents import create_agent
|
||||||
|
from langchain_anthropic.middleware import AnthropicPromptCachingMiddleware
|
||||||
|
from langchain_core.language_models import BaseChatModel
|
||||||
|
from langchain_core.messages import AIMessage, ToolMessage
|
||||||
|
|
||||||
|
from app.agents.new_chat.middleware.filesystem import SurfSenseFilesystemMiddleware
|
||||||
|
from app.agents.new_chat.middleware.knowledge_search import (
|
||||||
|
build_scoped_filesystem,
|
||||||
|
search_knowledge_base,
|
||||||
|
)
|
||||||
|
from app.services.new_streaming_service import VercelStreamingService
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
KB_TOP_K = 10
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# System prompt
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
AUTOCOMPLETE_SYSTEM_PROMPT = """You are a smart writing assistant that analyzes the user's screen to draft or complete text.
|
||||||
|
|
||||||
|
You will receive a screenshot of the user's screen. Your PRIMARY source of truth is the screenshot itself — the visual context determines what to write.
|
||||||
|
|
||||||
|
Your job:
|
||||||
|
1. Analyze the ENTIRE screenshot to understand what the user is working on (email thread, chat conversation, document, code editor, form, etc.).
|
||||||
|
2. Identify the text area where the user will type.
|
||||||
|
3. Generate the text the user most likely wants to write based on the visual context.
|
||||||
|
|
||||||
|
You also have access to the user's knowledge base documents via filesystem tools. However:
|
||||||
|
- ONLY consult the knowledge base if the screenshot clearly involves a topic where your KB documents are DIRECTLY relevant (e.g., the user is writing about a specific project/topic that matches a document title).
|
||||||
|
- Do NOT explore documents just because they exist. Most autocomplete requests can be answered purely from the screenshot.
|
||||||
|
- If you do read a document, only incorporate information that is 100% relevant to what the user is typing RIGHT NOW. Do not add extra details, background, or tangential information from the KB.
|
||||||
|
- Keep your output SHORT — autocomplete should feel like a natural continuation, not an essay.
|
||||||
|
|
||||||
|
Key behavior:
|
||||||
|
- If the text area is EMPTY, draft a concise response or message based on what you see on screen (e.g., reply to an email, respond to a chat message, continue a document).
|
||||||
|
- If the text area already has text, continue it naturally — typically just a sentence or two.
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
- Output ONLY the text to be inserted. No quotes, no explanations, no meta-commentary.
|
||||||
|
- Be CONCISE. Prefer a single paragraph or a few sentences. Autocomplete is a quick assist, not a full draft.
|
||||||
|
- Match the tone and formality of the surrounding context.
|
||||||
|
- If the screen shows code, write code. If it shows a casual chat, be casual. If it shows a formal email, be formal.
|
||||||
|
- Do NOT describe the screenshot or explain your reasoning.
|
||||||
|
- Do NOT cite or reference documents explicitly — just let the knowledge inform your writing naturally.
|
||||||
|
- If you cannot determine what to write, output nothing.
|
||||||
|
|
||||||
|
## Filesystem Tools `ls`, `read_file`, `write_file`, `edit_file`, `glob`, `grep`
|
||||||
|
|
||||||
|
All file paths must start with a `/`.
|
||||||
|
- ls: list files and directories at a given path.
|
||||||
|
- read_file: read a file from the filesystem.
|
||||||
|
- write_file: create a temporary file in the session (not persisted).
|
||||||
|
- edit_file: edit a file in the session (not persisted for /documents/ files).
|
||||||
|
- glob: find files matching a pattern (e.g., "**/*.xml").
|
||||||
|
- grep: search for text within files.
|
||||||
|
|
||||||
|
## When to Use Filesystem Tools
|
||||||
|
|
||||||
|
BEFORE reaching for any tool, ask yourself: "Can I write a good completion purely from the screenshot?" If yes, just write it — do NOT explore the KB.
|
||||||
|
|
||||||
|
Only use tools when:
|
||||||
|
- The user is clearly writing about a specific topic that likely has detailed information in their KB.
|
||||||
|
- You need a specific fact, name, number, or reference that the screenshot doesn't provide.
|
||||||
|
|
||||||
|
When you do use tools, be surgical:
|
||||||
|
- Check the `ls` output first. If no document title looks relevant, stop — do not read files just to see what's there.
|
||||||
|
- If a title looks relevant, read only the `<chunk_index>` (first ~20 lines) and jump to matched chunks. Do not read entire documents.
|
||||||
|
- Extract only the specific information you need and move on to generating the completion.
|
||||||
|
|
||||||
|
## Reading Documents Efficiently
|
||||||
|
|
||||||
|
Documents are formatted as XML. Each document contains:
|
||||||
|
- `<document_metadata>` — title, type, URL, etc.
|
||||||
|
- `<chunk_index>` — a table of every chunk with its **line range** and a
|
||||||
|
`matched="true"` flag for chunks that matched the search query.
|
||||||
|
- `<document_content>` — the actual chunks in original document order.
|
||||||
|
|
||||||
|
**Workflow**: read the first ~20 lines to see the `<chunk_index>`, identify
|
||||||
|
chunks marked `matched="true"`, then use `read_file(path, offset=<start_line>,
|
||||||
|
limit=<lines>)` to jump directly to those sections."""
|
||||||
|
|
||||||
|
APP_CONTEXT_BLOCK = """
|
||||||
|
|
||||||
|
The user is currently working in "{app_name}" (window: "{window_title}"). Use this to understand the type of application and adapt your tone and format accordingly."""
|
||||||
|
|
||||||
|
|
||||||
|
def _build_autocomplete_system_prompt(app_name: str, window_title: str) -> str:
|
||||||
|
prompt = AUTOCOMPLETE_SYSTEM_PROMPT
|
||||||
|
if app_name:
|
||||||
|
prompt += APP_CONTEXT_BLOCK.format(app_name=app_name, window_title=window_title)
|
||||||
|
return prompt
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Pre-compute KB filesystem (runs in parallel with agent compilation)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class _KBResult:
|
||||||
|
"""Container for pre-computed KB filesystem results."""
|
||||||
|
__slots__ = ("files", "ls_ai_msg", "ls_tool_msg")
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
files: dict[str, Any] | None = None,
|
||||||
|
ls_ai_msg: AIMessage | None = None,
|
||||||
|
ls_tool_msg: ToolMessage | None = None,
|
||||||
|
) -> None:
|
||||||
|
self.files = files
|
||||||
|
self.ls_ai_msg = ls_ai_msg
|
||||||
|
self.ls_tool_msg = ls_tool_msg
|
||||||
|
|
||||||
|
@property
|
||||||
|
def has_documents(self) -> bool:
|
||||||
|
return bool(self.files)
|
||||||
|
|
||||||
|
|
||||||
|
async def precompute_kb_filesystem(
|
||||||
|
search_space_id: int,
|
||||||
|
query: str,
|
||||||
|
top_k: int = KB_TOP_K,
|
||||||
|
) -> _KBResult:
|
||||||
|
"""Search the KB and build the scoped filesystem outside the agent.
|
||||||
|
|
||||||
|
This is designed to be called via ``asyncio.gather`` alongside agent
|
||||||
|
graph compilation so the two run concurrently.
|
||||||
|
"""
|
||||||
|
if not query:
|
||||||
|
return _KBResult()
|
||||||
|
|
||||||
|
try:
|
||||||
|
search_results = await search_knowledge_base(
|
||||||
|
query=query,
|
||||||
|
search_space_id=search_space_id,
|
||||||
|
top_k=top_k,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not search_results:
|
||||||
|
return _KBResult()
|
||||||
|
|
||||||
|
new_files, _ = await build_scoped_filesystem(
|
||||||
|
documents=search_results,
|
||||||
|
search_space_id=search_space_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not new_files:
|
||||||
|
return _KBResult()
|
||||||
|
|
||||||
|
doc_paths = [
|
||||||
|
p for p, v in new_files.items()
|
||||||
|
if p.startswith("/documents/") and v is not None
|
||||||
|
]
|
||||||
|
tool_call_id = f"auto_ls_{uuid.uuid4().hex[:12]}"
|
||||||
|
ai_msg = AIMessage(
|
||||||
|
content="",
|
||||||
|
tool_calls=[{"name": "ls", "args": {"path": "/documents"}, "id": tool_call_id}],
|
||||||
|
)
|
||||||
|
tool_msg = ToolMessage(
|
||||||
|
content=str(doc_paths) if doc_paths else "No documents found.",
|
||||||
|
tool_call_id=tool_call_id,
|
||||||
|
)
|
||||||
|
return _KBResult(files=new_files, ls_ai_msg=ai_msg, ls_tool_msg=tool_msg)
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
logger.warning("KB pre-computation failed, proceeding without KB", exc_info=True)
|
||||||
|
return _KBResult()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Filesystem middleware — no save_document, no persistence
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class AutocompleteFilesystemMiddleware(SurfSenseFilesystemMiddleware):
|
||||||
|
"""Filesystem middleware for autocomplete — read-only exploration only.
|
||||||
|
|
||||||
|
Strips ``save_document`` (permanent KB persistence) and passes
|
||||||
|
``search_space_id=None`` so ``write_file`` / ``edit_file`` stay ephemeral.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
super().__init__(search_space_id=None, created_by_id=None)
|
||||||
|
self.tools = [t for t in self.tools if t.name != "save_document"]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Agent factory
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
async def _compile_agent(
|
||||||
|
llm: BaseChatModel,
|
||||||
|
app_name: str,
|
||||||
|
window_title: str,
|
||||||
|
) -> Any:
|
||||||
|
"""Compile the agent graph (CPU-bound, runs in a thread)."""
|
||||||
|
system_prompt = _build_autocomplete_system_prompt(app_name, window_title)
|
||||||
|
final_system_prompt = system_prompt + "\n\n" + BASE_AGENT_PROMPT
|
||||||
|
|
||||||
|
middleware = [
|
||||||
|
AutocompleteFilesystemMiddleware(),
|
||||||
|
PatchToolCallsMiddleware(),
|
||||||
|
AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
|
||||||
|
]
|
||||||
|
|
||||||
|
agent = await asyncio.to_thread(
|
||||||
|
create_agent,
|
||||||
|
llm,
|
||||||
|
system_prompt=final_system_prompt,
|
||||||
|
tools=[],
|
||||||
|
middleware=middleware,
|
||||||
|
)
|
||||||
|
return agent.with_config({"recursion_limit": 200})
|
||||||
|
|
||||||
|
|
||||||
|
async def create_autocomplete_agent(
|
||||||
|
llm: BaseChatModel,
|
||||||
|
*,
|
||||||
|
search_space_id: int,
|
||||||
|
kb_query: str,
|
||||||
|
app_name: str = "",
|
||||||
|
window_title: str = "",
|
||||||
|
) -> tuple[Any, _KBResult]:
|
||||||
|
"""Create the autocomplete agent and pre-compute KB in parallel.
|
||||||
|
|
||||||
|
Returns ``(agent, kb_result)`` so the caller can inject the pre-computed
|
||||||
|
filesystem into the agent's initial state without any middleware delay.
|
||||||
|
"""
|
||||||
|
agent, kb = await asyncio.gather(
|
||||||
|
_compile_agent(llm, app_name, window_title),
|
||||||
|
precompute_kb_filesystem(search_space_id, kb_query),
|
||||||
|
)
|
||||||
|
return agent, kb
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Streaming helper
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
async def stream_autocomplete_agent(
|
||||||
|
agent: Any,
|
||||||
|
input_data: dict[str, Any],
|
||||||
|
streaming_service: VercelStreamingService,
|
||||||
|
*,
|
||||||
|
emit_message_start: bool = True,
|
||||||
|
) -> AsyncGenerator[str, None]:
|
||||||
|
"""Stream agent events as Vercel SSE, with thinking steps for tool calls.
|
||||||
|
|
||||||
|
When ``emit_message_start`` is False the caller has already sent the
|
||||||
|
``message_start`` event (e.g. to show preparation steps before the agent
|
||||||
|
runs).
|
||||||
|
"""
|
||||||
|
thread_id = uuid.uuid4().hex
|
||||||
|
config = {"configurable": {"thread_id": thread_id}}
|
||||||
|
|
||||||
|
current_text_id: str | None = None
|
||||||
|
active_tool_depth = 0
|
||||||
|
thinking_step_counter = 0
|
||||||
|
tool_step_ids: dict[str, str] = {}
|
||||||
|
step_titles: dict[str, str] = {}
|
||||||
|
completed_step_ids: set[str] = set()
|
||||||
|
last_active_step_id: str | None = None
|
||||||
|
|
||||||
|
def next_thinking_step_id() -> str:
|
||||||
|
nonlocal thinking_step_counter
|
||||||
|
thinking_step_counter += 1
|
||||||
|
return f"autocomplete-step-{thinking_step_counter}"
|
||||||
|
|
||||||
|
def complete_current_step() -> str | None:
|
||||||
|
nonlocal last_active_step_id
|
||||||
|
if last_active_step_id and last_active_step_id not in completed_step_ids:
|
||||||
|
completed_step_ids.add(last_active_step_id)
|
||||||
|
title = step_titles.get(last_active_step_id, "Done")
|
||||||
|
event = streaming_service.format_thinking_step(
|
||||||
|
step_id=last_active_step_id,
|
||||||
|
title=title,
|
||||||
|
status="complete",
|
||||||
|
)
|
||||||
|
last_active_step_id = None
|
||||||
|
return event
|
||||||
|
return None
|
||||||
|
|
||||||
|
if emit_message_start:
|
||||||
|
yield streaming_service.format_message_start()
|
||||||
|
|
||||||
|
# Emit an initial "Generating completion" step so the UI immediately
|
||||||
|
# shows activity once the agent starts its first LLM call.
|
||||||
|
gen_step_id = next_thinking_step_id()
|
||||||
|
last_active_step_id = gen_step_id
|
||||||
|
step_titles[gen_step_id] = "Generating completion"
|
||||||
|
yield streaming_service.format_thinking_step(
|
||||||
|
step_id=gen_step_id,
|
||||||
|
title="Generating completion",
|
||||||
|
status="in_progress",
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
async for event in agent.astream_events(input_data, config=config, version="v2"):
|
||||||
|
event_type = event.get("event", "")
|
||||||
|
|
||||||
|
if event_type == "on_chat_model_stream":
|
||||||
|
if active_tool_depth > 0:
|
||||||
|
continue
|
||||||
|
if "surfsense:internal" in event.get("tags", []):
|
||||||
|
continue
|
||||||
|
chunk = event.get("data", {}).get("chunk")
|
||||||
|
if chunk and hasattr(chunk, "content"):
|
||||||
|
content = chunk.content
|
||||||
|
if content and isinstance(content, str):
|
||||||
|
if current_text_id is None:
|
||||||
|
step_event = complete_current_step()
|
||||||
|
if step_event:
|
||||||
|
yield step_event
|
||||||
|
current_text_id = streaming_service.generate_text_id()
|
||||||
|
yield streaming_service.format_text_start(current_text_id)
|
||||||
|
yield streaming_service.format_text_delta(current_text_id, content)
|
||||||
|
|
||||||
|
elif event_type == "on_tool_start":
|
||||||
|
active_tool_depth += 1
|
||||||
|
tool_name = event.get("name", "unknown_tool")
|
||||||
|
run_id = event.get("run_id", "")
|
||||||
|
tool_input = event.get("data", {}).get("input", {})
|
||||||
|
|
||||||
|
if current_text_id is not None:
|
||||||
|
yield streaming_service.format_text_end(current_text_id)
|
||||||
|
current_text_id = None
|
||||||
|
|
||||||
|
step_event = complete_current_step()
|
||||||
|
if step_event:
|
||||||
|
yield step_event
|
||||||
|
|
||||||
|
tool_step_id = next_thinking_step_id()
|
||||||
|
tool_step_ids[run_id] = tool_step_id
|
||||||
|
last_active_step_id = tool_step_id
|
||||||
|
|
||||||
|
title, items = _describe_tool_call(tool_name, tool_input)
|
||||||
|
step_titles[tool_step_id] = title
|
||||||
|
yield streaming_service.format_thinking_step(
|
||||||
|
step_id=tool_step_id,
|
||||||
|
title=title,
|
||||||
|
status="in_progress",
|
||||||
|
items=items,
|
||||||
|
)
|
||||||
|
|
||||||
|
elif event_type == "on_tool_end":
|
||||||
|
active_tool_depth = max(0, active_tool_depth - 1)
|
||||||
|
run_id = event.get("run_id", "")
|
||||||
|
step_id = tool_step_ids.pop(run_id, None)
|
||||||
|
if step_id and step_id not in completed_step_ids:
|
||||||
|
completed_step_ids.add(step_id)
|
||||||
|
title = step_titles.get(step_id, "Done")
|
||||||
|
yield streaming_service.format_thinking_step(
|
||||||
|
step_id=step_id,
|
||||||
|
title=title,
|
||||||
|
status="complete",
|
||||||
|
)
|
||||||
|
if last_active_step_id == step_id:
|
||||||
|
last_active_step_id = None
|
||||||
|
|
||||||
|
if current_text_id is not None:
|
||||||
|
yield streaming_service.format_text_end(current_text_id)
|
||||||
|
step_event = complete_current_step()
|
||||||
|
if step_event:
|
||||||
|
yield step_event
|
||||||
|
|
||||||
|
yield streaming_service.format_finish()
|
||||||
|
yield streaming_service.format_done()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Autocomplete agent streaming error: {e}", exc_info=True)
|
||||||
|
if current_text_id is not None:
|
||||||
|
yield streaming_service.format_text_end(current_text_id)
|
||||||
|
yield streaming_service.format_error("Autocomplete failed. Please try again.")
|
||||||
|
yield streaming_service.format_done()
|
||||||
|
|
||||||
|
|
||||||
|
def _describe_tool_call(tool_name: str, tool_input: Any) -> tuple[str, list[str]]:
|
||||||
|
"""Return a human-readable (title, items) for a tool call thinking step."""
|
||||||
|
inp = tool_input if isinstance(tool_input, dict) else {}
|
||||||
|
if tool_name == "ls":
|
||||||
|
path = inp.get("path", "/")
|
||||||
|
return "Listing files", [path]
|
||||||
|
if tool_name == "read_file":
|
||||||
|
fp = inp.get("file_path", "")
|
||||||
|
display = fp if len(fp) <= 80 else "…" + fp[-77:]
|
||||||
|
return "Reading file", [display]
|
||||||
|
if tool_name == "write_file":
|
||||||
|
fp = inp.get("file_path", "")
|
||||||
|
display = fp if len(fp) <= 80 else "…" + fp[-77:]
|
||||||
|
return "Writing file", [display]
|
||||||
|
if tool_name == "edit_file":
|
||||||
|
fp = inp.get("file_path", "")
|
||||||
|
display = fp if len(fp) <= 80 else "…" + fp[-77:]
|
||||||
|
return "Editing file", [display]
|
||||||
|
if tool_name == "glob":
|
||||||
|
pat = inp.get("pattern", "")
|
||||||
|
base = inp.get("path", "/")
|
||||||
|
return "Searching files", [f"{pat} in {base}"]
|
||||||
|
if tool_name == "grep":
|
||||||
|
pat = inp.get("pattern", "")
|
||||||
|
path = inp.get("path", "")
|
||||||
|
display_pat = pat[:60] + ("…" if len(pat) > 60 else "")
|
||||||
|
return "Searching content", [f'"{display_pat}"' + (f" in {path}" if path else "")]
|
||||||
|
return f"Using {tool_name}", []
|
||||||
|
|
@ -1,139 +1,40 @@
|
||||||
|
"""Vision autocomplete service — agent-based with scoped filesystem.
|
||||||
|
|
||||||
|
Optimized pipeline:
|
||||||
|
1. Start the SSE stream immediately so the UI shows progress.
|
||||||
|
2. Derive a KB search query from window_title (no separate LLM call).
|
||||||
|
3. Run KB filesystem pre-computation and agent graph compilation in PARALLEL.
|
||||||
|
4. Inject pre-computed KB files as initial state and stream the agent.
|
||||||
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import AsyncGenerator
|
from typing import AsyncGenerator
|
||||||
|
|
||||||
from langchain_core.messages import HumanMessage, SystemMessage
|
from langchain_core.messages import HumanMessage
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.retriever.chunks_hybrid_search import ChucksHybridSearchRetriever
|
from app.agents.autocomplete import create_autocomplete_agent, stream_autocomplete_agent
|
||||||
from app.services.llm_service import get_vision_llm
|
from app.services.llm_service import get_vision_llm
|
||||||
from app.services.new_streaming_service import VercelStreamingService
|
from app.services.new_streaming_service import VercelStreamingService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
KB_TOP_K = 5
|
PREP_STEP_ID = "autocomplete-prep"
|
||||||
KB_MAX_CHARS = 4000
|
|
||||||
|
|
||||||
EXTRACT_QUERY_PROMPT = """Look at this screenshot and describe in 1-2 short sentences what the user is working on and what topic they need to write about. Be specific about the subject matter. Output ONLY the description, nothing else."""
|
|
||||||
|
|
||||||
EXTRACT_QUERY_PROMPT_WITH_APP = """The user is currently in the application "{app_name}" with the window titled "{window_title}".
|
|
||||||
|
|
||||||
Look at this screenshot and describe in 1-2 short sentences what the user is working on and what topic they need to write about. Be specific about the subject matter. Output ONLY the description, nothing else."""
|
|
||||||
|
|
||||||
VISION_SYSTEM_PROMPT = """You are a smart writing assistant that analyzes the user's screen to draft or complete text.
|
|
||||||
|
|
||||||
You will receive a screenshot of the user's screen. Your job:
|
|
||||||
1. Analyze the ENTIRE screenshot to understand what the user is working on (email thread, chat conversation, document, code editor, form, etc.).
|
|
||||||
2. Identify the text area where the user will type.
|
|
||||||
3. Based on the full visual context, generate the text the user most likely wants to write.
|
|
||||||
|
|
||||||
Key behavior:
|
|
||||||
- If the text area is EMPTY, draft a full response or message based on what you see on screen (e.g., reply to an email, respond to a chat message, continue a document).
|
|
||||||
- If the text area already has text, continue it naturally.
|
|
||||||
|
|
||||||
Rules:
|
|
||||||
- Output ONLY the text to be inserted. No quotes, no explanations, no meta-commentary.
|
|
||||||
- Be concise but complete — a full thought, not a fragment.
|
|
||||||
- Match the tone and formality of the surrounding context.
|
|
||||||
- If the screen shows code, write code. If it shows a casual chat, be casual. If it shows a formal email, be formal.
|
|
||||||
- Do NOT describe the screenshot or explain your reasoning.
|
|
||||||
- If you cannot determine what to write, output nothing."""
|
|
||||||
|
|
||||||
APP_CONTEXT_BLOCK = """
|
|
||||||
|
|
||||||
The user is currently working in "{app_name}" (window: "{window_title}"). Use this to understand the type of application and adapt your tone and format accordingly."""
|
|
||||||
|
|
||||||
KB_CONTEXT_BLOCK = """
|
|
||||||
|
|
||||||
You also have access to the user's knowledge base documents below. Use them to write more accurate, informed, and contextually relevant text. Do NOT cite or reference the documents explicitly — just let the knowledge inform your writing naturally.
|
|
||||||
|
|
||||||
<knowledge_base>
|
|
||||||
{kb_context}
|
|
||||||
</knowledge_base>"""
|
|
||||||
|
|
||||||
|
|
||||||
def _build_system_prompt(app_name: str, window_title: str, kb_context: str) -> str:
|
def _derive_kb_query(app_name: str, window_title: str) -> str:
|
||||||
"""Assemble the system prompt from optional context blocks."""
|
parts = [p for p in (window_title, app_name) if p]
|
||||||
prompt = VISION_SYSTEM_PROMPT
|
return " ".join(parts)
|
||||||
if app_name:
|
|
||||||
prompt += APP_CONTEXT_BLOCK.format(app_name=app_name, window_title=window_title)
|
|
||||||
if kb_context:
|
|
||||||
prompt += KB_CONTEXT_BLOCK.format(kb_context=kb_context)
|
|
||||||
return prompt
|
|
||||||
|
|
||||||
|
|
||||||
def _is_vision_unsupported_error(e: Exception) -> bool:
|
def _is_vision_unsupported_error(e: Exception) -> bool:
|
||||||
"""Check if an exception indicates the model doesn't support vision/images."""
|
|
||||||
msg = str(e).lower()
|
msg = str(e).lower()
|
||||||
return "content must be a string" in msg or "does not support image" in msg
|
return "content must be a string" in msg or "does not support image" in msg
|
||||||
|
|
||||||
|
|
||||||
async def _extract_query_from_screenshot(
|
# ---------------------------------------------------------------------------
|
||||||
llm, screenshot_data_url: str,
|
# Main entry point
|
||||||
app_name: str = "", window_title: str = "",
|
# ---------------------------------------------------------------------------
|
||||||
) -> str | None:
|
|
||||||
"""Ask the Vision LLM to describe what the user is working on.
|
|
||||||
|
|
||||||
Raises vision-unsupported errors so the caller can return a
|
|
||||||
friendly message immediately instead of retrying with astream.
|
|
||||||
"""
|
|
||||||
if app_name:
|
|
||||||
prompt_text = EXTRACT_QUERY_PROMPT_WITH_APP.format(
|
|
||||||
app_name=app_name, window_title=window_title,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
prompt_text = EXTRACT_QUERY_PROMPT
|
|
||||||
|
|
||||||
try:
|
|
||||||
response = await llm.ainvoke([
|
|
||||||
HumanMessage(content=[
|
|
||||||
{"type": "text", "text": prompt_text},
|
|
||||||
{"type": "image_url", "image_url": {"url": screenshot_data_url}},
|
|
||||||
]),
|
|
||||||
])
|
|
||||||
query = response.content.strip() if hasattr(response, "content") else ""
|
|
||||||
return query if query else None
|
|
||||||
except Exception as e:
|
|
||||||
if _is_vision_unsupported_error(e):
|
|
||||||
raise
|
|
||||||
logger.warning(f"Failed to extract query from screenshot: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
async def _search_knowledge_base(
|
|
||||||
session: AsyncSession, search_space_id: int, query: str
|
|
||||||
) -> str:
|
|
||||||
"""Search the KB and return formatted context string."""
|
|
||||||
try:
|
|
||||||
retriever = ChucksHybridSearchRetriever(session)
|
|
||||||
results = await retriever.hybrid_search(
|
|
||||||
query_text=query,
|
|
||||||
top_k=KB_TOP_K,
|
|
||||||
search_space_id=search_space_id,
|
|
||||||
)
|
|
||||||
|
|
||||||
if not results:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
parts: list[str] = []
|
|
||||||
char_count = 0
|
|
||||||
for doc in results:
|
|
||||||
title = doc.get("document", {}).get("title", "Untitled")
|
|
||||||
for chunk in doc.get("chunks", []):
|
|
||||||
content = chunk.get("content", "").strip()
|
|
||||||
if not content:
|
|
||||||
continue
|
|
||||||
entry = f"[{title}]\n{content}"
|
|
||||||
if char_count + len(entry) > KB_MAX_CHARS:
|
|
||||||
break
|
|
||||||
parts.append(entry)
|
|
||||||
char_count += len(entry)
|
|
||||||
if char_count >= KB_MAX_CHARS:
|
|
||||||
break
|
|
||||||
|
|
||||||
return "\n\n---\n\n".join(parts)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"KB search failed, proceeding without context: {e}")
|
|
||||||
return ""
|
|
||||||
|
|
||||||
|
|
||||||
async def stream_vision_autocomplete(
|
async def stream_vision_autocomplete(
|
||||||
|
|
@ -144,13 +45,7 @@ async def stream_vision_autocomplete(
|
||||||
app_name: str = "",
|
app_name: str = "",
|
||||||
window_title: str = "",
|
window_title: str = "",
|
||||||
) -> AsyncGenerator[str, None]:
|
) -> AsyncGenerator[str, None]:
|
||||||
"""Analyze a screenshot with the vision LLM and stream a text completion.
|
"""Analyze a screenshot with a vision-LLM agent and stream a text completion."""
|
||||||
|
|
||||||
Pipeline:
|
|
||||||
1. Extract a search query from the screenshot (non-streaming)
|
|
||||||
2. Search the knowledge base for relevant context
|
|
||||||
3. Stream the final completion with screenshot + KB + app context
|
|
||||||
"""
|
|
||||||
streaming = VercelStreamingService()
|
streaming = VercelStreamingService()
|
||||||
vision_error_msg = (
|
vision_error_msg = (
|
||||||
"The selected model does not support vision. "
|
"The selected model does not support vision. "
|
||||||
|
|
@ -164,62 +59,89 @@ async def stream_vision_autocomplete(
|
||||||
yield streaming.format_done()
|
yield streaming.format_done()
|
||||||
return
|
return
|
||||||
|
|
||||||
kb_context = ""
|
# Start SSE stream immediately so the UI has something to show
|
||||||
|
yield streaming.format_message_start()
|
||||||
|
|
||||||
|
kb_query = _derive_kb_query(app_name, window_title)
|
||||||
|
|
||||||
|
# Show a preparation step while KB search + agent compile run
|
||||||
|
yield streaming.format_thinking_step(
|
||||||
|
step_id=PREP_STEP_ID,
|
||||||
|
title="Searching knowledge base",
|
||||||
|
status="in_progress",
|
||||||
|
items=[kb_query] if kb_query else [],
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
query = await _extract_query_from_screenshot(
|
agent, kb = await create_autocomplete_agent(
|
||||||
llm, screenshot_data_url, app_name=app_name, window_title=window_title,
|
llm,
|
||||||
|
search_space_id=search_space_id,
|
||||||
|
kb_query=kb_query,
|
||||||
|
app_name=app_name,
|
||||||
|
window_title=window_title,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Vision autocomplete: selected model does not support vision: {e}")
|
if _is_vision_unsupported_error(e):
|
||||||
yield streaming.format_message_start()
|
logger.warning("Vision autocomplete: model does not support vision: %s", e)
|
||||||
yield streaming.format_error(vision_error_msg)
|
yield streaming.format_error(vision_error_msg)
|
||||||
|
yield streaming.format_done()
|
||||||
|
return
|
||||||
|
logger.error("Failed to create autocomplete agent: %s", e, exc_info=True)
|
||||||
|
yield streaming.format_error("Autocomplete failed. Please try again.")
|
||||||
yield streaming.format_done()
|
yield streaming.format_done()
|
||||||
return
|
return
|
||||||
|
|
||||||
if query:
|
has_kb = kb.has_documents
|
||||||
kb_context = await _search_knowledge_base(session, search_space_id, query)
|
doc_count = len(kb.files) if has_kb else 0 # type: ignore[arg-type]
|
||||||
|
|
||||||
system_prompt = _build_system_prompt(app_name, window_title, kb_context)
|
yield streaming.format_thinking_step(
|
||||||
|
step_id=PREP_STEP_ID,
|
||||||
|
title="Searching knowledge base",
|
||||||
|
status="complete",
|
||||||
|
items=[f"Found {doc_count} document{'s' if doc_count != 1 else ''}"] if kb_query else ["Skipped"],
|
||||||
|
)
|
||||||
|
|
||||||
messages = [
|
# Build agent input with pre-computed KB as initial state
|
||||||
SystemMessage(content=system_prompt),
|
if has_kb:
|
||||||
HumanMessage(content=[
|
instruction = (
|
||||||
{
|
"Analyze this screenshot, then explore the knowledge base documents "
|
||||||
"type": "text",
|
"listed above — read the chunk index of any document whose title "
|
||||||
"text": "Analyze this screenshot. Understand the full context of what the user is working on, then generate the text they most likely want to write in the active text area.",
|
"looks relevant and check matched chunks for useful facts. "
|
||||||
},
|
"Finally, generate a concise autocomplete for the active text area, "
|
||||||
{
|
"enhanced with any relevant KB information you found."
|
||||||
"type": "image_url",
|
)
|
||||||
"image_url": {"url": screenshot_data_url},
|
else:
|
||||||
},
|
instruction = (
|
||||||
]),
|
"Analyze this screenshot and generate a concise autocomplete "
|
||||||
]
|
"for the active text area based on what you see."
|
||||||
|
)
|
||||||
|
|
||||||
text_started = False
|
user_message = HumanMessage(content=[
|
||||||
text_id = ""
|
{"type": "text", "text": instruction},
|
||||||
|
{"type": "image_url", "image_url": {"url": screenshot_data_url}},
|
||||||
|
])
|
||||||
|
|
||||||
|
input_data: dict = {"messages": [user_message]}
|
||||||
|
|
||||||
|
if has_kb:
|
||||||
|
input_data["files"] = kb.files
|
||||||
|
input_data["messages"] = [kb.ls_ai_msg, kb.ls_tool_msg, user_message]
|
||||||
|
logger.info("Autocomplete: injected %d KB files into agent initial state", doc_count)
|
||||||
|
else:
|
||||||
|
logger.info("Autocomplete: no KB documents found, proceeding with screenshot only")
|
||||||
|
|
||||||
|
# Stream the agent (message_start already sent above)
|
||||||
try:
|
try:
|
||||||
yield streaming.format_message_start()
|
async for sse in stream_autocomplete_agent(
|
||||||
text_id = streaming.generate_text_id()
|
agent, input_data, streaming, emit_message_start=False,
|
||||||
yield streaming.format_text_start(text_id)
|
):
|
||||||
text_started = True
|
yield sse
|
||||||
|
|
||||||
async for chunk in llm.astream(messages):
|
|
||||||
token = chunk.content if hasattr(chunk, "content") else str(chunk)
|
|
||||||
if token:
|
|
||||||
yield streaming.format_text_delta(text_id, token)
|
|
||||||
|
|
||||||
yield streaming.format_text_end(text_id)
|
|
||||||
yield streaming.format_finish()
|
|
||||||
yield streaming.format_done()
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if text_started:
|
|
||||||
yield streaming.format_text_end(text_id)
|
|
||||||
|
|
||||||
if _is_vision_unsupported_error(e):
|
if _is_vision_unsupported_error(e):
|
||||||
logger.warning(f"Vision autocomplete: selected model does not support vision: {e}")
|
logger.warning("Vision autocomplete: model does not support vision: %s", e)
|
||||||
yield streaming.format_error(vision_error_msg)
|
yield streaming.format_error(vision_error_msg)
|
||||||
|
yield streaming.format_done()
|
||||||
else:
|
else:
|
||||||
logger.error(f"Vision autocomplete streaming error: {e}", exc_info=True)
|
logger.error("Vision autocomplete streaming error: %s", e, exc_info=True)
|
||||||
yield streaming.format_error("Autocomplete failed. Please try again.")
|
yield streaming.format_error("Autocomplete failed. Please try again.")
|
||||||
yield streaming.format_done()
|
yield streaming.format_done()
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,18 @@ type SSEEvent =
|
||||||
| { type: "text-end"; id: string }
|
| { type: "text-end"; id: string }
|
||||||
| { type: "start"; messageId: string }
|
| { type: "start"; messageId: string }
|
||||||
| { type: "finish" }
|
| { type: "finish" }
|
||||||
| { type: "error"; errorText: string };
|
| { type: "error"; errorText: string }
|
||||||
|
| {
|
||||||
|
type: "data-thinking-step";
|
||||||
|
data: { id: string; title: string; status: string; items: string[] };
|
||||||
|
};
|
||||||
|
|
||||||
|
interface AgentStep {
|
||||||
|
id: string;
|
||||||
|
title: string;
|
||||||
|
status: string;
|
||||||
|
items: string[];
|
||||||
|
}
|
||||||
|
|
||||||
function friendlyError(raw: string | number): string {
|
function friendlyError(raw: string | number): string {
|
||||||
if (typeof raw === "number") {
|
if (typeof raw === "number") {
|
||||||
|
|
@ -34,11 +45,24 @@ function friendlyError(raw: string | number): string {
|
||||||
|
|
||||||
const AUTO_DISMISS_MS = 3000;
|
const AUTO_DISMISS_MS = 3000;
|
||||||
|
|
||||||
|
function StepIcon({ status }: { status: string }) {
|
||||||
|
if (status === "complete") {
|
||||||
|
return (
|
||||||
|
<svg className="step-icon step-icon-done" viewBox="0 0 16 16" fill="none">
|
||||||
|
<circle cx="8" cy="8" r="7" stroke="#4ade80" strokeWidth="1.5" />
|
||||||
|
<path d="M5 8.5l2 2 4-4.5" stroke="#4ade80" strokeWidth="1.5" strokeLinecap="round" strokeLinejoin="round" />
|
||||||
|
</svg>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return <span className="step-spinner" />;
|
||||||
|
}
|
||||||
|
|
||||||
export default function SuggestionPage() {
|
export default function SuggestionPage() {
|
||||||
const api = useElectronAPI();
|
const api = useElectronAPI();
|
||||||
const [suggestion, setSuggestion] = useState("");
|
const [suggestion, setSuggestion] = useState("");
|
||||||
const [isLoading, setIsLoading] = useState(true);
|
const [isLoading, setIsLoading] = useState(true);
|
||||||
const [error, setError] = useState<string | null>(null);
|
const [error, setError] = useState<string | null>(null);
|
||||||
|
const [steps, setSteps] = useState<AgentStep[]>([]);
|
||||||
const abortRef = useRef<AbortController | null>(null);
|
const abortRef = useRef<AbortController | null>(null);
|
||||||
|
|
||||||
const isDesktop = !!api?.onAutocompleteContext;
|
const isDesktop = !!api?.onAutocompleteContext;
|
||||||
|
|
@ -66,6 +90,7 @@ export default function SuggestionPage() {
|
||||||
setIsLoading(true);
|
setIsLoading(true);
|
||||||
setSuggestion("");
|
setSuggestion("");
|
||||||
setError(null);
|
setError(null);
|
||||||
|
setSteps([]);
|
||||||
|
|
||||||
let token = getBearerToken();
|
let token = getBearerToken();
|
||||||
if (!token) {
|
if (!token) {
|
||||||
|
|
@ -137,6 +162,17 @@ export default function SuggestionPage() {
|
||||||
setSuggestion((prev) => prev + parsed.delta);
|
setSuggestion((prev) => prev + parsed.delta);
|
||||||
} else if (parsed.type === "error") {
|
} else if (parsed.type === "error") {
|
||||||
setError(friendlyError(parsed.errorText));
|
setError(friendlyError(parsed.errorText));
|
||||||
|
} else if (parsed.type === "data-thinking-step") {
|
||||||
|
const { id, title, status, items } = parsed.data;
|
||||||
|
setSteps((prev) => {
|
||||||
|
const existing = prev.findIndex((s) => s.id === id);
|
||||||
|
if (existing >= 0) {
|
||||||
|
const updated = [...prev];
|
||||||
|
updated[existing] = { id, title, status, items };
|
||||||
|
return updated;
|
||||||
|
}
|
||||||
|
return [...prev, { id, title, status, items }];
|
||||||
|
});
|
||||||
}
|
}
|
||||||
} catch {
|
} catch {
|
||||||
continue;
|
continue;
|
||||||
|
|
@ -185,13 +221,33 @@ export default function SuggestionPage() {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isLoading && !suggestion) {
|
const showLoading = isLoading && !suggestion;
|
||||||
|
|
||||||
|
if (showLoading) {
|
||||||
return (
|
return (
|
||||||
<div className="suggestion-tooltip">
|
<div className="suggestion-tooltip">
|
||||||
<div className="suggestion-loading">
|
<div className="agent-activity">
|
||||||
<span className="suggestion-dot" />
|
{steps.length === 0 && (
|
||||||
<span className="suggestion-dot" />
|
<div className="activity-initial">
|
||||||
<span className="suggestion-dot" />
|
<span className="step-spinner" />
|
||||||
|
<span className="activity-label">Preparing…</span>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{steps.length > 0 && (
|
||||||
|
<div className="activity-steps">
|
||||||
|
{steps.map((step) => (
|
||||||
|
<div key={step.id} className="activity-step">
|
||||||
|
<StepIcon status={step.status} />
|
||||||
|
<span className="step-label">
|
||||||
|
{step.title}
|
||||||
|
{step.items.length > 0 && (
|
||||||
|
<span className="step-detail"> · {step.items[0]}</span>
|
||||||
|
)}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
|
|
|
||||||
|
|
@ -19,13 +19,21 @@ body:has(.suggestion-body) {
|
||||||
}
|
}
|
||||||
|
|
||||||
.suggestion-tooltip {
|
.suggestion-tooltip {
|
||||||
|
box-sizing: border-box;
|
||||||
background: #1e1e1e;
|
background: #1e1e1e;
|
||||||
border: 1px solid #3c3c3c;
|
border: 1px solid #3c3c3c;
|
||||||
border-radius: 8px;
|
border-radius: 8px;
|
||||||
padding: 8px 12px;
|
padding: 8px 12px;
|
||||||
margin: 4px;
|
margin: 4px;
|
||||||
max-width: 400px;
|
max-width: 400px;
|
||||||
|
/* MAX_HEIGHT in suggestion-window.ts is 400px. Subtract 8px for margin
|
||||||
|
(4px * 2) so the tooltip + margin fits within the Electron window.
|
||||||
|
box-sizing: border-box ensures padding + border are included. */
|
||||||
|
max-height: 392px;
|
||||||
box-shadow: 0 4px 16px rgba(0, 0, 0, 0.5);
|
box-shadow: 0 4px 16px rgba(0, 0, 0, 0.5);
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
overflow: hidden;
|
||||||
}
|
}
|
||||||
|
|
||||||
.suggestion-text {
|
.suggestion-text {
|
||||||
|
|
@ -35,6 +43,26 @@ body:has(.suggestion-body) {
|
||||||
margin: 0 0 6px 0;
|
margin: 0 0 6px 0;
|
||||||
word-wrap: break-word;
|
word-wrap: break-word;
|
||||||
white-space: pre-wrap;
|
white-space: pre-wrap;
|
||||||
|
overflow-y: auto;
|
||||||
|
flex: 1 1 auto;
|
||||||
|
min-height: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.suggestion-text::-webkit-scrollbar {
|
||||||
|
width: 5px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.suggestion-text::-webkit-scrollbar-track {
|
||||||
|
background: transparent;
|
||||||
|
}
|
||||||
|
|
||||||
|
.suggestion-text::-webkit-scrollbar-thumb {
|
||||||
|
background: #555;
|
||||||
|
border-radius: 3px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.suggestion-text::-webkit-scrollbar-thumb:hover {
|
||||||
|
background: #777;
|
||||||
}
|
}
|
||||||
|
|
||||||
.suggestion-actions {
|
.suggestion-actions {
|
||||||
|
|
@ -43,6 +71,7 @@ body:has(.suggestion-body) {
|
||||||
gap: 4px;
|
gap: 4px;
|
||||||
border-top: 1px solid #2a2a2a;
|
border-top: 1px solid #2a2a2a;
|
||||||
padding-top: 6px;
|
padding-top: 6px;
|
||||||
|
flex-shrink: 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
.suggestion-btn {
|
.suggestion-btn {
|
||||||
|
|
@ -86,36 +115,77 @@ body:has(.suggestion-body) {
|
||||||
font-size: 12px;
|
font-size: 12px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.suggestion-loading {
|
/* --- Agent activity indicator --- */
|
||||||
|
|
||||||
|
.agent-activity {
|
||||||
display: flex;
|
display: flex;
|
||||||
gap: 5px;
|
flex-direction: column;
|
||||||
|
gap: 4px;
|
||||||
|
overflow-y: auto;
|
||||||
|
max-height: 340px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.activity-initial {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 8px;
|
||||||
padding: 2px 0;
|
padding: 2px 0;
|
||||||
justify-content: center;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
.suggestion-dot {
|
.activity-label {
|
||||||
width: 4px;
|
color: #a1a1aa;
|
||||||
height: 4px;
|
font-size: 12px;
|
||||||
|
white-space: nowrap;
|
||||||
|
overflow: hidden;
|
||||||
|
text-overflow: ellipsis;
|
||||||
|
}
|
||||||
|
|
||||||
|
.activity-steps {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 3px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.activity-step {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 6px;
|
||||||
|
min-height: 18px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.step-label {
|
||||||
|
color: #d4d4d4;
|
||||||
|
font-size: 12px;
|
||||||
|
white-space: nowrap;
|
||||||
|
overflow: hidden;
|
||||||
|
text-overflow: ellipsis;
|
||||||
|
}
|
||||||
|
|
||||||
|
.step-detail {
|
||||||
|
color: #71717a;
|
||||||
|
font-size: 11px;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Spinner (in_progress) */
|
||||||
|
.step-spinner {
|
||||||
|
width: 14px;
|
||||||
|
height: 14px;
|
||||||
|
flex-shrink: 0;
|
||||||
|
border: 1.5px solid #3f3f46;
|
||||||
|
border-top-color: #a78bfa;
|
||||||
border-radius: 50%;
|
border-radius: 50%;
|
||||||
background: #666;
|
animation: step-spin 0.7s linear infinite;
|
||||||
animation: suggestion-pulse 1.2s infinite ease-in-out;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
.suggestion-dot:nth-child(2) {
|
/* Checkmark icon (complete) */
|
||||||
animation-delay: 0.15s;
|
.step-icon {
|
||||||
|
width: 14px;
|
||||||
|
height: 14px;
|
||||||
|
flex-shrink: 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
.suggestion-dot:nth-child(3) {
|
@keyframes step-spin {
|
||||||
animation-delay: 0.3s;
|
to {
|
||||||
}
|
transform: rotate(360deg);
|
||||||
|
|
||||||
@keyframes suggestion-pulse {
|
|
||||||
0%, 80%, 100% {
|
|
||||||
opacity: 0.3;
|
|
||||||
transform: scale(0.8);
|
|
||||||
}
|
|
||||||
40% {
|
|
||||||
opacity: 1;
|
|
||||||
transform: scale(1.1);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -92,15 +92,7 @@ import { useMediaQuery } from "@/hooks/use-media-query";
|
||||||
import { useElectronAPI } from "@/hooks/use-platform";
|
import { useElectronAPI } from "@/hooks/use-platform";
|
||||||
import { cn } from "@/lib/utils";
|
import { cn } from "@/lib/utils";
|
||||||
|
|
||||||
/** Placeholder texts that cycle in new chats when input is empty */
|
const COMPOSER_PLACEHOLDER = "Ask anything · Type / for prompts · Type @ to mention docs";
|
||||||
const CYCLING_PLACEHOLDERS = [
|
|
||||||
"Ask SurfSense anything or @mention docs",
|
|
||||||
"Generate a podcast from my vacation ideas in Notion",
|
|
||||||
"Sum up last week's meeting notes from Drive in a bulleted list",
|
|
||||||
"Give me a brief overview of the most urgent tickets in Jira and Linear",
|
|
||||||
"Briefly, what are today's top ten important emails and calendar events?",
|
|
||||||
"Check if this week's Slack messages reference any GitHub issues",
|
|
||||||
];
|
|
||||||
|
|
||||||
export const Thread: FC = () => {
|
export const Thread: FC = () => {
|
||||||
return <ThreadContent />;
|
return <ThreadContent />;
|
||||||
|
|
@ -380,29 +372,7 @@ const Composer: FC = () => {
|
||||||
const isThreadEmpty = useAuiState(({ thread }) => thread.isEmpty);
|
const isThreadEmpty = useAuiState(({ thread }) => thread.isEmpty);
|
||||||
const isThreadRunning = useAuiState(({ thread }) => thread.isRunning);
|
const isThreadRunning = useAuiState(({ thread }) => thread.isRunning);
|
||||||
|
|
||||||
// Cycling placeholder state - only cycles in new chats
|
const currentPlaceholder = COMPOSER_PLACEHOLDER;
|
||||||
const [placeholderIndex, setPlaceholderIndex] = useState(0);
|
|
||||||
|
|
||||||
// Cycle through placeholders every 4 seconds when thread is empty (new chat)
|
|
||||||
useEffect(() => {
|
|
||||||
// Only cycle when thread is empty (new chat)
|
|
||||||
if (!isThreadEmpty) {
|
|
||||||
// Reset to first placeholder when chat becomes active
|
|
||||||
setPlaceholderIndex(0);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const intervalId = setInterval(() => {
|
|
||||||
setPlaceholderIndex((prev) => (prev + 1) % CYCLING_PLACEHOLDERS.length);
|
|
||||||
}, 6000);
|
|
||||||
|
|
||||||
return () => clearInterval(intervalId);
|
|
||||||
}, [isThreadEmpty]);
|
|
||||||
|
|
||||||
// Compute current placeholder - only cycle in new chats
|
|
||||||
const currentPlaceholder = isThreadEmpty
|
|
||||||
? CYCLING_PLACEHOLDERS[placeholderIndex]
|
|
||||||
: CYCLING_PLACEHOLDERS[0];
|
|
||||||
|
|
||||||
// Live collaboration state
|
// Live collaboration state
|
||||||
const { data: currentUser } = useAtomValue(currentUserAtom);
|
const { data: currentUser } = useAtomValue(currentUserAtom);
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue