Merge remote-tracking branch 'upstream/dev' into feat/unified-etl-pipeline

This commit is contained in:
Anish Sarkar 2026-04-06 22:04:51 +05:30
commit 63a75052ca
76 changed files with 3041 additions and 376 deletions

View file

@ -3,6 +3,7 @@ from fastapi import APIRouter
from .airtable_add_connector_route import (
router as airtable_add_connector_router,
)
from .autocomplete_routes import router as autocomplete_router
from .chat_comments_routes import router as chat_comments_router
from .circleback_webhook_route import router as circleback_webhook_router
from .clickup_add_connector_route import router as clickup_add_connector_router
@ -95,3 +96,4 @@ router.include_router(incentive_tasks_router) # Incentive tasks for earning fre
router.include_router(stripe_router) # Stripe checkout for additional page packs
router.include_router(youtube_router) # YouTube playlist resolution
router.include_router(prompts_router)
router.include_router(autocomplete_router) # Lightweight autocomplete with KB context

View file

@ -1,7 +1,5 @@
import base64
import hashlib
import logging
import secrets
from datetime import UTC, datetime, timedelta
from uuid import UUID
@ -26,7 +24,11 @@ from app.utils.connector_naming import (
check_duplicate_connector,
generate_unique_connector_name,
)
from app.utils.oauth_security import OAuthStateManager, TokenEncryption
from app.utils.oauth_security import (
OAuthStateManager,
TokenEncryption,
generate_pkce_pair,
)
logger = logging.getLogger(__name__)
@ -75,28 +77,6 @@ def make_basic_auth_header(client_id: str, client_secret: str) -> str:
return f"Basic {b64}"
def generate_pkce_pair() -> tuple[str, str]:
"""
Generate PKCE code verifier and code challenge.
Returns:
Tuple of (code_verifier, code_challenge)
"""
# Generate code verifier (43-128 characters)
code_verifier = (
base64.urlsafe_b64encode(secrets.token_bytes(32)).decode("utf-8").rstrip("=")
)
# Generate code challenge (SHA256 hash of verifier, base64url encoded)
code_challenge = (
base64.urlsafe_b64encode(hashlib.sha256(code_verifier.encode("utf-8")).digest())
.decode("utf-8")
.rstrip("=")
)
return code_verifier, code_challenge
@router.get("/auth/airtable/connector/add")
async def connect_airtable(space_id: int, user: User = Depends(current_active_user)):
"""

View file

@ -0,0 +1,42 @@
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field
from sqlalchemy.ext.asyncio import AsyncSession
from app.db import User, get_async_session
from app.services.new_streaming_service import VercelStreamingService
from app.services.vision_autocomplete_service import stream_vision_autocomplete
from app.users import current_active_user
from app.utils.rbac import check_search_space_access
router = APIRouter(prefix="/autocomplete", tags=["autocomplete"])
MAX_SCREENSHOT_SIZE = 20 * 1024 * 1024 # 20 MB base64 ceiling
class VisionAutocompleteRequest(BaseModel):
screenshot: str = Field(..., max_length=MAX_SCREENSHOT_SIZE)
search_space_id: int
app_name: str = ""
window_title: str = ""
@router.post("/vision/stream")
async def vision_autocomplete_stream(
body: VisionAutocompleteRequest,
user: User = Depends(current_active_user),
session: AsyncSession = Depends(get_async_session),
):
await check_search_space_access(session, user, body.search_space_id)
return StreamingResponse(
stream_vision_autocomplete(
body.screenshot, body.search_space_id, session,
app_name=body.app_name, window_title=body.window_title,
),
media_type="text/event-stream",
headers={
**VercelStreamingService.get_response_headers(),
"X-Accel-Buffering": "no",
},
)

View file

@ -28,7 +28,11 @@ from app.utils.connector_naming import (
check_duplicate_connector,
generate_unique_connector_name,
)
from app.utils.oauth_security import OAuthStateManager, TokenEncryption
from app.utils.oauth_security import (
OAuthStateManager,
TokenEncryption,
generate_code_verifier,
)
logger = logging.getLogger(__name__)
@ -96,9 +100,14 @@ async def connect_calendar(space_id: int, user: User = Depends(current_active_us
flow = get_google_flow()
# Generate secure state parameter with HMAC signature
code_verifier = generate_code_verifier()
flow.code_verifier = code_verifier
# Generate secure state parameter with HMAC signature (includes PKCE code_verifier)
state_manager = get_state_manager()
state_encoded = state_manager.generate_secure_state(space_id, user.id)
state_encoded = state_manager.generate_secure_state(
space_id, user.id, code_verifier=code_verifier
)
auth_url, _ = flow.authorization_url(
access_type="offline",
@ -146,8 +155,11 @@ async def reauth_calendar(
flow = get_google_flow()
code_verifier = generate_code_verifier()
flow.code_verifier = code_verifier
state_manager = get_state_manager()
extra: dict = {"connector_id": connector_id}
extra: dict = {"connector_id": connector_id, "code_verifier": code_verifier}
if return_url and return_url.startswith("/"):
extra["return_url"] = return_url
state_encoded = state_manager.generate_secure_state(space_id, user.id, **extra)
@ -225,6 +237,7 @@ async def calendar_callback(
user_id = UUID(data["user_id"])
space_id = data["space_id"]
code_verifier = data.get("code_verifier")
# Validate redirect URI (security: ensure it matches configured value)
if not config.GOOGLE_CALENDAR_REDIRECT_URI:
@ -233,6 +246,7 @@ async def calendar_callback(
)
flow = get_google_flow()
flow.code_verifier = code_verifier
flow.fetch_token(code=code)
creds = flow.credentials

View file

@ -41,7 +41,11 @@ from app.utils.connector_naming import (
check_duplicate_connector,
generate_unique_connector_name,
)
from app.utils.oauth_security import OAuthStateManager, TokenEncryption
from app.utils.oauth_security import (
OAuthStateManager,
TokenEncryption,
generate_code_verifier,
)
# Relax token scope validation for Google OAuth
os.environ["OAUTHLIB_RELAX_TOKEN_SCOPE"] = "1"
@ -127,14 +131,19 @@ async def connect_drive(space_id: int, user: User = Depends(current_active_user)
flow = get_google_flow()
# Generate secure state parameter with HMAC signature
code_verifier = generate_code_verifier()
flow.code_verifier = code_verifier
# Generate secure state parameter with HMAC signature (includes PKCE code_verifier)
state_manager = get_state_manager()
state_encoded = state_manager.generate_secure_state(space_id, user.id)
state_encoded = state_manager.generate_secure_state(
space_id, user.id, code_verifier=code_verifier
)
# Generate authorization URL
auth_url, _ = flow.authorization_url(
access_type="offline", # Get refresh token
prompt="consent", # Force consent screen to get refresh token
access_type="offline",
prompt="consent",
include_granted_scopes="true",
state=state_encoded,
)
@ -193,8 +202,11 @@ async def reauth_drive(
flow = get_google_flow()
code_verifier = generate_code_verifier()
flow.code_verifier = code_verifier
state_manager = get_state_manager()
extra: dict = {"connector_id": connector_id}
extra: dict = {"connector_id": connector_id, "code_verifier": code_verifier}
if return_url and return_url.startswith("/"):
extra["return_url"] = return_url
state_encoded = state_manager.generate_secure_state(space_id, user.id, **extra)
@ -285,6 +297,7 @@ async def drive_callback(
space_id = data["space_id"]
reauth_connector_id = data.get("connector_id")
reauth_return_url = data.get("return_url")
code_verifier = data.get("code_verifier")
logger.info(
f"Processing Google Drive callback for user {user_id}, space {space_id}"
@ -296,8 +309,9 @@ async def drive_callback(
status_code=500, detail="GOOGLE_DRIVE_REDIRECT_URI not configured"
)
# Exchange authorization code for tokens
# Exchange authorization code for tokens (restore PKCE code_verifier from state)
flow = get_google_flow()
flow.code_verifier = code_verifier
flow.fetch_token(code=code)
creds = flow.credentials

View file

@ -28,7 +28,11 @@ from app.utils.connector_naming import (
check_duplicate_connector,
generate_unique_connector_name,
)
from app.utils.oauth_security import OAuthStateManager, TokenEncryption
from app.utils.oauth_security import (
OAuthStateManager,
TokenEncryption,
generate_code_verifier,
)
logger = logging.getLogger(__name__)
@ -109,9 +113,14 @@ async def connect_gmail(space_id: int, user: User = Depends(current_active_user)
flow = get_google_flow()
# Generate secure state parameter with HMAC signature
code_verifier = generate_code_verifier()
flow.code_verifier = code_verifier
# Generate secure state parameter with HMAC signature (includes PKCE code_verifier)
state_manager = get_state_manager()
state_encoded = state_manager.generate_secure_state(space_id, user.id)
state_encoded = state_manager.generate_secure_state(
space_id, user.id, code_verifier=code_verifier
)
auth_url, _ = flow.authorization_url(
access_type="offline",
@ -164,8 +173,11 @@ async def reauth_gmail(
flow = get_google_flow()
code_verifier = generate_code_verifier()
flow.code_verifier = code_verifier
state_manager = get_state_manager()
extra: dict = {"connector_id": connector_id}
extra: dict = {"connector_id": connector_id, "code_verifier": code_verifier}
if return_url and return_url.startswith("/"):
extra["return_url"] = return_url
state_encoded = state_manager.generate_secure_state(space_id, user.id, **extra)
@ -256,6 +268,7 @@ async def gmail_callback(
user_id = UUID(data["user_id"])
space_id = data["space_id"]
code_verifier = data.get("code_verifier")
# Validate redirect URI (security: ensure it matches configured value)
if not config.GOOGLE_GMAIL_REDIRECT_URI:
@ -264,6 +277,7 @@ async def gmail_callback(
)
flow = get_google_flow()
flow.code_verifier = code_verifier
flow.fetch_token(code=code)
creds = flow.credentials

View file

@ -522,14 +522,17 @@ async def get_llm_preferences(
image_generation_config = await _get_image_gen_config_by_id(
session, search_space.image_generation_config_id
)
vision_llm = await _get_llm_config_by_id(session, search_space.vision_llm_id)
return LLMPreferencesRead(
agent_llm_id=search_space.agent_llm_id,
document_summary_llm_id=search_space.document_summary_llm_id,
image_generation_config_id=search_space.image_generation_config_id,
vision_llm_id=search_space.vision_llm_id,
agent_llm=agent_llm,
document_summary_llm=document_summary_llm,
image_generation_config=image_generation_config,
vision_llm=vision_llm,
)
except HTTPException:
@ -589,14 +592,17 @@ async def update_llm_preferences(
image_generation_config = await _get_image_gen_config_by_id(
session, search_space.image_generation_config_id
)
vision_llm = await _get_llm_config_by_id(session, search_space.vision_llm_id)
return LLMPreferencesRead(
agent_llm_id=search_space.agent_llm_id,
document_summary_llm_id=search_space.document_summary_llm_id,
image_generation_config_id=search_space.image_generation_config_id,
vision_llm_id=search_space.vision_llm_id,
agent_llm=agent_llm,
document_summary_llm=document_summary_llm,
image_generation_config=image_generation_config,
vision_llm=vision_llm,
)
except HTTPException: