Merge pull request #1130 from CREDO23/feat/vision-autocomplete

[Feat] Vision-based autocomplete with KB grounding
This commit is contained in:
Rohan Verma 2026-04-05 13:24:55 -07:00 committed by GitHub
commit 74bf3df880
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
32 changed files with 1482 additions and 33 deletions

View file

@ -3,6 +3,7 @@ from fastapi import APIRouter
from .airtable_add_connector_route import (
router as airtable_add_connector_router,
)
from .autocomplete_routes import router as autocomplete_router
from .chat_comments_routes import router as chat_comments_router
from .circleback_webhook_route import router as circleback_webhook_router
from .clickup_add_connector_route import router as clickup_add_connector_router
@ -95,3 +96,4 @@ router.include_router(incentive_tasks_router) # Incentive tasks for earning fre
router.include_router(stripe_router) # Stripe checkout for additional page packs
router.include_router(youtube_router) # YouTube playlist resolution
router.include_router(prompts_router)
router.include_router(autocomplete_router) # Lightweight autocomplete with KB context

View file

@ -0,0 +1,42 @@
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field
from sqlalchemy.ext.asyncio import AsyncSession
from app.db import User, get_async_session
from app.services.new_streaming_service import VercelStreamingService
from app.services.vision_autocomplete_service import stream_vision_autocomplete
from app.users import current_active_user
from app.utils.rbac import check_search_space_access
router = APIRouter(prefix="/autocomplete", tags=["autocomplete"])
MAX_SCREENSHOT_SIZE = 20 * 1024 * 1024 # 20 MB base64 ceiling
class VisionAutocompleteRequest(BaseModel):
screenshot: str = Field(..., max_length=MAX_SCREENSHOT_SIZE)
search_space_id: int
app_name: str = ""
window_title: str = ""
@router.post("/vision/stream")
async def vision_autocomplete_stream(
body: VisionAutocompleteRequest,
user: User = Depends(current_active_user),
session: AsyncSession = Depends(get_async_session),
):
await check_search_space_access(session, user, body.search_space_id)
return StreamingResponse(
stream_vision_autocomplete(
body.screenshot, body.search_space_id, session,
app_name=body.app_name, window_title=body.window_title,
),
media_type="text/event-stream",
headers={
**VercelStreamingService.get_response_headers(),
"X-Accel-Buffering": "no",
},
)

View file

@ -522,14 +522,17 @@ async def get_llm_preferences(
image_generation_config = await _get_image_gen_config_by_id(
session, search_space.image_generation_config_id
)
vision_llm = await _get_llm_config_by_id(session, search_space.vision_llm_id)
return LLMPreferencesRead(
agent_llm_id=search_space.agent_llm_id,
document_summary_llm_id=search_space.document_summary_llm_id,
image_generation_config_id=search_space.image_generation_config_id,
vision_llm_id=search_space.vision_llm_id,
agent_llm=agent_llm,
document_summary_llm=document_summary_llm,
image_generation_config=image_generation_config,
vision_llm=vision_llm,
)
except HTTPException:
@ -589,14 +592,17 @@ async def update_llm_preferences(
image_generation_config = await _get_image_gen_config_by_id(
session, search_space.image_generation_config_id
)
vision_llm = await _get_llm_config_by_id(session, search_space.vision_llm_id)
return LLMPreferencesRead(
agent_llm_id=search_space.agent_llm_id,
document_summary_llm_id=search_space.document_summary_llm_id,
image_generation_config_id=search_space.image_generation_config_id,
vision_llm_id=search_space.vision_llm_id,
agent_llm=agent_llm,
document_summary_llm=document_summary_llm,
image_generation_config=image_generation_config,
vision_llm=vision_llm,
)
except HTTPException: