diff --git a/surfsense_backend/app/routes/__init__.py b/surfsense_backend/app/routes/__init__.py index 1937f11cb..a063b5976 100644 --- a/surfsense_backend/app/routes/__init__.py +++ b/surfsense_backend/app/routes/__init__.py @@ -3,6 +3,7 @@ from fastapi import APIRouter from .airtable_add_connector_route import ( router as airtable_add_connector_router, ) +from .autocomplete_routes import router as autocomplete_router from .chat_comments_routes import router as chat_comments_router from .circleback_webhook_route import router as circleback_webhook_router from .clickup_add_connector_route import router as clickup_add_connector_router @@ -95,3 +96,4 @@ router.include_router(incentive_tasks_router) # Incentive tasks for earning fre router.include_router(stripe_router) # Stripe checkout for additional page packs router.include_router(youtube_router) # YouTube playlist resolution router.include_router(prompts_router) +router.include_router(autocomplete_router) # Lightweight autocomplete with KB context diff --git a/surfsense_backend/app/routes/autocomplete_routes.py b/surfsense_backend/app/routes/autocomplete_routes.py new file mode 100644 index 000000000..9a285a723 --- /dev/null +++ b/surfsense_backend/app/routes/autocomplete_routes.py @@ -0,0 +1,136 @@ +import logging +from typing import AsyncGenerator + +from fastapi import APIRouter, Depends, Query +from fastapi.responses import StreamingResponse +from langchain_core.messages import HumanMessage, SystemMessage +from sqlalchemy.ext.asyncio import AsyncSession + +from app.db import User, get_async_session +from app.retriever.chunks_hybrid_search import ChucksHybridSearchRetriever +from app.services.llm_service import get_agent_llm +from app.services.new_streaming_service import VercelStreamingService +from app.users import current_active_user + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/autocomplete", tags=["autocomplete"]) + +AUTOCOMPLETE_SYSTEM_PROMPT = """You are an inline text autocomplete engine. Your job is to complete the user's text naturally. + +Rules: +- Output ONLY the continuation text. Do NOT repeat what the user already typed. +- Keep completions concise: 1-3 sentences maximum. +- Match the user's tone, style, and language. +- If knowledge base context is provided, use it to make the completion factually accurate and personalized. +- Do NOT add quotes, explanations, or meta-commentary. +- Do NOT start with a space unless grammatically required. +- If you cannot produce a useful completion, output nothing.""" + +KB_CONTEXT_TEMPLATE = """ +Relevant knowledge base context (use this to personalize the completion): +--- +{kb_context} +--- +""" + + +async def _stream_autocomplete( + text: str, + cursor_position: int, + search_space_id: int, + session: AsyncSession, +) -> AsyncGenerator[str, None]: + """Stream an autocomplete response with KB context.""" + streaming_service = VercelStreamingService() + + try: + # Text before cursor is what we're completing + text_before_cursor = text[:cursor_position] if cursor_position >= 0 else text + + if not text_before_cursor.strip(): + yield streaming_service.format_message_start() + yield streaming_service.format_finish() + yield streaming_service.format_done() + return + + # Fast KB lookup: vector-only search, top 3 chunks, no planner LLM + kb_context = "" + try: + retriever = ChucksHybridSearchRetriever(session) + chunks = await retriever.vector_search( + query_text=text_before_cursor[-200:], # last 200 chars for relevance + top_k=3, + search_space_id=search_space_id, + ) + if chunks: + kb_snippets = [] + for chunk in chunks: + content = getattr(chunk, "content", None) or getattr(chunk, "chunk_text", "") + if content: + kb_snippets.append(content[:300]) + if kb_snippets: + kb_context = KB_CONTEXT_TEMPLATE.format( + kb_context="\n\n".join(kb_snippets) + ) + except Exception as e: + logger.warning(f"KB search failed for autocomplete, proceeding without context: {e}") + + # Get the search space's configured LLM + llm = await get_agent_llm(session, search_space_id) + if not llm: + yield streaming_service.format_message_start() + error_msg = "No LLM configured for this search space" + yield streaming_service.format_error(error_msg) + yield streaming_service.format_done() + return + + system_prompt = AUTOCOMPLETE_SYSTEM_PROMPT + if kb_context: + system_prompt += kb_context + + messages = [ + SystemMessage(content=system_prompt), + HumanMessage(content=f"Complete this text:\n{text_before_cursor}"), + ] + + # Stream the response + yield streaming_service.format_message_start() + text_id = streaming_service.generate_text_id() + yield streaming_service.format_text_start(text_id) + + async for chunk in llm.astream(messages): + token = chunk.content if hasattr(chunk, "content") else str(chunk) + if token: + yield streaming_service.format_text_delta(text_id, token) + + yield streaming_service.format_text_end(text_id) + yield streaming_service.format_finish() + yield streaming_service.format_done() + + except Exception as e: + logger.error(f"Autocomplete streaming error: {e}") + yield streaming_service.format_error(str(e)) + yield streaming_service.format_done() + + +@router.post("/stream") +async def autocomplete_stream( + text: str = Query(..., description="Current text in the input field"), + cursor_position: int = Query(-1, description="Cursor position in the text (-1 for end)"), + search_space_id: int = Query(..., description="Search space ID for KB context and LLM config"), + user: User = Depends(current_active_user), + session: AsyncSession = Depends(get_async_session), +): + """Stream an autocomplete suggestion based on the current text and KB context.""" + if cursor_position < 0: + cursor_position = len(text) + + return StreamingResponse( + _stream_autocomplete(text, cursor_position, search_space_id, session), + media_type="text/event-stream", + headers={ + **VercelStreamingService.get_response_headers(), + "X-Accel-Buffering": "no", + }, + )