Merge remote-tracking branch 'upstream/dev' into feat/obsidian-plugin

2026-05-01 03:46:25 +02:00 · 2026-04-24 21:34:55 +05:30 · 2026-04-24 21:34:55 +05:30 · 9b1b9a90c0
commit 9b1b9a90c0
parent 336bd57c4d a0f2851784
175 changed files with 10592 additions and 2302 deletions
--- a/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py
+++ b/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py
@ -39,52 +39,6 @@ def _handle_greenlet_error(e: Exception, task_name: str, connector_id: int) -> N
        )


-@celery_app.task(name="index_slack_messages", bind=True)
-def index_slack_messages_task(
-    self,
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str,
-    end_date: str,
-):
-    """Celery task to index Slack messages."""
-    import asyncio
-
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-
-    try:
-        loop.run_until_complete(
-            _index_slack_messages(
-                connector_id, search_space_id, user_id, start_date, end_date
-            )
-        )
-    except Exception as e:
-        _handle_greenlet_error(e, "index_slack_messages", connector_id)
-        raise
-    finally:
-        loop.close()
-
-
-async def _index_slack_messages(
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str,
-    end_date: str,
-):
-    """Index Slack messages with new session."""
-    from app.routes.search_source_connectors_routes import (
-        run_slack_indexing,
-    )
-
-    async with get_celery_session_maker()() as session:
-        await run_slack_indexing(
-            session, connector_id, search_space_id, user_id, start_date, end_date
-        )
-
-
@celery_app.task(name="index_notion_pages", bind=True)
 def index_notion_pages_task(
    self,
@ -174,92 +128,6 @@ async def _index_github_repos(
        )


-@celery_app.task(name="index_linear_issues", bind=True)
-def index_linear_issues_task(
-    self,
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str,
-    end_date: str,
-):
-    """Celery task to index Linear issues."""
-    import asyncio
-
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-
-    try:
-        loop.run_until_complete(
-            _index_linear_issues(
-                connector_id, search_space_id, user_id, start_date, end_date
-            )
-        )
-    finally:
-        loop.close()
-
-
-async def _index_linear_issues(
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str,
-    end_date: str,
-):
-    """Index Linear issues with new session."""
-    from app.routes.search_source_connectors_routes import (
-        run_linear_indexing,
-    )
-
-    async with get_celery_session_maker()() as session:
-        await run_linear_indexing(
-            session, connector_id, search_space_id, user_id, start_date, end_date
-        )
-
-
-@celery_app.task(name="index_jira_issues", bind=True)
-def index_jira_issues_task(
-    self,
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str,
-    end_date: str,
-):
-    """Celery task to index Jira issues."""
-    import asyncio
-
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-
-    try:
-        loop.run_until_complete(
-            _index_jira_issues(
-                connector_id, search_space_id, user_id, start_date, end_date
-            )
-        )
-    finally:
-        loop.close()
-
-
-async def _index_jira_issues(
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str,
-    end_date: str,
-):
-    """Index Jira issues with new session."""
-    from app.routes.search_source_connectors_routes import (
-        run_jira_indexing,
-    )
-
-    async with get_celery_session_maker()() as session:
-        await run_jira_indexing(
-            session, connector_id, search_space_id, user_id, start_date, end_date
-        )
-
-
@celery_app.task(name="index_confluence_pages", bind=True)
 def index_confluence_pages_task(
    self,
@ -303,49 +171,6 @@ async def _index_confluence_pages(
        )


-@celery_app.task(name="index_clickup_tasks", bind=True)
-def index_clickup_tasks_task(
-    self,
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str,
-    end_date: str,
-):
-    """Celery task to index ClickUp tasks."""
-    import asyncio
-
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-
-    try:
-        loop.run_until_complete(
-            _index_clickup_tasks(
-                connector_id, search_space_id, user_id, start_date, end_date
-            )
-        )
-    finally:
-        loop.close()
-
-
-async def _index_clickup_tasks(
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str,
-    end_date: str,
-):
-    """Index ClickUp tasks with new session."""
-    from app.routes.search_source_connectors_routes import (
-        run_clickup_indexing,
-    )
-
-    async with get_celery_session_maker()() as session:
-        await run_clickup_indexing(
-            session, connector_id, search_space_id, user_id, start_date, end_date
-        )
-
-
@celery_app.task(name="index_google_calendar_events", bind=True)
 def index_google_calendar_events_task(
    self,
@ -392,49 +217,6 @@ async def _index_google_calendar_events(
        )


-@celery_app.task(name="index_airtable_records", bind=True)
-def index_airtable_records_task(
-    self,
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str,
-    end_date: str,
-):
-    """Celery task to index Airtable records."""
-    import asyncio
-
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-
-    try:
-        loop.run_until_complete(
-            _index_airtable_records(
-                connector_id, search_space_id, user_id, start_date, end_date
-            )
-        )
-    finally:
-        loop.close()
-
-
-async def _index_airtable_records(
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str,
-    end_date: str,
-):
-    """Index Airtable records with new session."""
-    from app.routes.search_source_connectors_routes import (
-        run_airtable_indexing,
-    )
-
-    async with get_celery_session_maker()() as session:
-        await run_airtable_indexing(
-            session, connector_id, search_space_id, user_id, start_date, end_date
-        )
-
-
@celery_app.task(name="index_google_gmail_messages", bind=True)
 def index_google_gmail_messages_task(
    self,
@ -622,135 +404,6 @@ async def _index_dropbox_files(
        )


-@celery_app.task(name="index_discord_messages", bind=True)
-def index_discord_messages_task(
-    self,
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str,
-    end_date: str,
-):
-    """Celery task to index Discord messages."""
-    import asyncio
-
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-
-    try:
-        loop.run_until_complete(
-            _index_discord_messages(
-                connector_id, search_space_id, user_id, start_date, end_date
-            )
-        )
-    finally:
-        loop.close()
-
-
-async def _index_discord_messages(
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str,
-    end_date: str,
-):
-    """Index Discord messages with new session."""
-    from app.routes.search_source_connectors_routes import (
-        run_discord_indexing,
-    )
-
-    async with get_celery_session_maker()() as session:
-        await run_discord_indexing(
-            session, connector_id, search_space_id, user_id, start_date, end_date
-        )
-
-
-@celery_app.task(name="index_teams_messages", bind=True)
-def index_teams_messages_task(
-    self,
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str,
-    end_date: str,
-):
-    """Celery task to index Microsoft Teams messages."""
-    import asyncio
-
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-
-    try:
-        loop.run_until_complete(
-            _index_teams_messages(
-                connector_id, search_space_id, user_id, start_date, end_date
-            )
-        )
-    finally:
-        loop.close()
-
-
-async def _index_teams_messages(
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str,
-    end_date: str,
-):
-    """Index Microsoft Teams messages with new session."""
-    from app.routes.search_source_connectors_routes import (
-        run_teams_indexing,
-    )
-
-    async with get_celery_session_maker()() as session:
-        await run_teams_indexing(
-            session, connector_id, search_space_id, user_id, start_date, end_date
-        )
-
-
-@celery_app.task(name="index_luma_events", bind=True)
-def index_luma_events_task(
-    self,
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str,
-    end_date: str,
-):
-    """Celery task to index Luma events."""
-    import asyncio
-
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-
-    try:
-        loop.run_until_complete(
-            _index_luma_events(
-                connector_id, search_space_id, user_id, start_date, end_date
-            )
-        )
-    finally:
-        loop.close()
-
-
-async def _index_luma_events(
-    connector_id: int,
-    search_space_id: int,
-    user_id: str,
-    start_date: str,
-    end_date: str,
-):
-    """Index Luma events with new session."""
-    from app.routes.search_source_connectors_routes import (
-        run_luma_indexing,
-    )
-
-    async with get_celery_session_maker()() as session:
-        await run_luma_indexing(
-            session, connector_id, search_space_id, user_id, start_date, end_date
-        )
-
-
@celery_app.task(name="index_elasticsearch_documents", bind=True)
 def index_elasticsearch_documents_task(
    self,
--- a/surfsense_backend/app/tasks/celery_tasks/schedule_checker_task.py
+++ b/surfsense_backend/app/tasks/celery_tasks/schedule_checker_task.py
@ -51,50 +51,51 @@ async def _check_and_trigger_schedules():

            logger.info(f"Found {len(due_connectors)} connectors due for indexing")

-            # Import all indexing tasks
+            # Import indexing tasks for KB connectors only.
+            # Live connectors (Linear, Slack, Jira, ClickUp, Airtable, Discord,
+            # Teams, Gmail, Calendar, Luma) use real-time tools instead.
            from app.tasks.celery_tasks.connector_tasks import (
-                index_airtable_records_task,
-                index_clickup_tasks_task,
                index_confluence_pages_task,
                index_crawled_urls_task,
-                index_discord_messages_task,
                index_elasticsearch_documents_task,
                index_github_repos_task,
-                index_google_calendar_events_task,
                index_google_drive_files_task,
-                index_google_gmail_messages_task,
-                index_jira_issues_task,
-                index_linear_issues_task,
-                index_luma_events_task,
                index_notion_pages_task,
-                index_slack_messages_task,
            )

-            # Map connector types to their tasks
            task_map = {
-                SearchSourceConnectorType.SLACK_CONNECTOR: index_slack_messages_task,
                SearchSourceConnectorType.NOTION_CONNECTOR: index_notion_pages_task,
                SearchSourceConnectorType.GITHUB_CONNECTOR: index_github_repos_task,
-                SearchSourceConnectorType.LINEAR_CONNECTOR: index_linear_issues_task,
-                SearchSourceConnectorType.JIRA_CONNECTOR: index_jira_issues_task,
                SearchSourceConnectorType.CONFLUENCE_CONNECTOR: index_confluence_pages_task,
-                SearchSourceConnectorType.CLICKUP_CONNECTOR: index_clickup_tasks_task,
-                SearchSourceConnectorType.GOOGLE_CALENDAR_CONNECTOR: index_google_calendar_events_task,
-                SearchSourceConnectorType.AIRTABLE_CONNECTOR: index_airtable_records_task,
-                SearchSourceConnectorType.GOOGLE_GMAIL_CONNECTOR: index_google_gmail_messages_task,
-                SearchSourceConnectorType.DISCORD_CONNECTOR: index_discord_messages_task,
-                SearchSourceConnectorType.LUMA_CONNECTOR: index_luma_events_task,
                SearchSourceConnectorType.ELASTICSEARCH_CONNECTOR: index_elasticsearch_documents_task,
                SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: index_crawled_urls_task,
                SearchSourceConnectorType.GOOGLE_DRIVE_CONNECTOR: index_google_drive_files_task,
-                # Composio connector types (unified with native Google tasks)
                SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR: index_google_drive_files_task,
-                SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR: index_google_gmail_messages_task,
-                SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR: index_google_calendar_events_task,
            }

+            from app.services.mcp_oauth.registry import LIVE_CONNECTOR_TYPES
+
+            # Disable obsolete periodic indexing for live connectors in one batch.
+            live_disabled = []
+            for connector in due_connectors:
+                if connector.connector_type in LIVE_CONNECTOR_TYPES:
+                    connector.periodic_indexing_enabled = False
+                    connector.next_scheduled_at = None
+                    live_disabled.append(connector)
+            if live_disabled:
+                await session.commit()
+                for c in live_disabled:
+                    logger.info(
+                        "Disabled obsolete periodic indexing for live connector %s (%s)",
+                        c.id,
+                        c.connector_type.value,
+                    )
+
            # Trigger indexing for each due connector
            for connector in due_connectors:
+                if connector in live_disabled:
+                    continue
+
                # Primary guard: Redis lock indicates a task is currently running.
                if is_connector_indexing_locked(connector.id):
                    logger.info(
--- a/surfsense_backend/app/tasks/chat/stream_new_chat.py
+++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py
@ -30,6 +30,8 @@ from sqlalchemy.orm import selectinload

 from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent
 from app.agents.new_chat.checkpointer import get_checkpointer
+from app.agents.new_chat.filesystem_selection import FilesystemSelection
+from app.config import config
 from app.agents.new_chat.llm_config import (
    AgentConfig,
    create_chat_litellm_from_agent_config,
@ -145,6 +147,102 @@ class StreamResult:
    interrupt_value: dict[str, Any] | None = None
    sandbox_files: list[str] = field(default_factory=list)
    agent_called_update_memory: bool = False
+    request_id: str | None = None
+    turn_id: str = ""
+    filesystem_mode: str = "cloud"
+    client_platform: str = "web"
+    intent_detected: str = "chat_only"
+    intent_confidence: float = 0.0
+    write_attempted: bool = False
+    write_succeeded: bool = False
+    verification_succeeded: bool = False
+    commit_gate_passed: bool = True
+    commit_gate_reason: str = ""
+
+
+def _safe_float(value: Any, default: float = 0.0) -> float:
+    try:
+        return float(value)
+    except (TypeError, ValueError):
+        return default
+
+
+def _tool_output_to_text(tool_output: Any) -> str:
+    if isinstance(tool_output, dict):
+        if isinstance(tool_output.get("result"), str):
+            return tool_output["result"]
+        if isinstance(tool_output.get("error"), str):
+            return tool_output["error"]
+        return json.dumps(tool_output, ensure_ascii=False)
+    return str(tool_output)
+
+
+def _tool_output_has_error(tool_output: Any) -> bool:
+    if isinstance(tool_output, dict):
+        if tool_output.get("error"):
+            return True
+        result = tool_output.get("result")
+        if isinstance(result, str) and result.strip().lower().startswith("error:"):
+            return True
+        return False
+    if isinstance(tool_output, str):
+        return tool_output.strip().lower().startswith("error:")
+    return False
+
+
+def _extract_resolved_file_path(*, tool_name: str, tool_output: Any) -> str | None:
+    if isinstance(tool_output, dict):
+        path_value = tool_output.get("path")
+        if isinstance(path_value, str) and path_value.strip():
+            return path_value.strip()
+    text = _tool_output_to_text(tool_output)
+    if tool_name == "write_file":
+        match = re.search(r"Updated file\s+(.+)$", text.strip())
+        if match:
+            return match.group(1).strip()
+    if tool_name == "edit_file":
+        match = re.search(r"in '([^']+)'", text)
+        if match:
+            return match.group(1).strip()
+    return None
+
+
+def _contract_enforcement_active(result: StreamResult) -> bool:
+    # Keep policy deterministic with no env-driven progression modes:
+    # enforce the file-operation contract only in desktop local-folder mode.
+    return result.filesystem_mode == "desktop_local_folder"
+
+
+def _evaluate_file_contract_outcome(result: StreamResult) -> tuple[bool, str]:
+    if result.intent_detected != "file_write":
+        return True, ""
+    if not result.write_attempted:
+        return False, "no_write_attempt"
+    if not result.write_succeeded:
+        return False, "write_failed"
+    if not result.verification_succeeded:
+        return False, "verification_failed"
+    return True, ""
+
+
+def _log_file_contract(stage: str, result: StreamResult, **extra: Any) -> None:
+    payload: dict[str, Any] = {
+        "stage": stage,
+        "request_id": result.request_id or "unknown",
+        "turn_id": result.turn_id or "unknown",
+        "chat_id": result.turn_id.split(":", 1)[0] if ":" in result.turn_id else "unknown",
+        "filesystem_mode": result.filesystem_mode,
+        "client_platform": result.client_platform,
+        "intent_detected": result.intent_detected,
+        "intent_confidence": result.intent_confidence,
+        "write_attempted": result.write_attempted,
+        "write_succeeded": result.write_succeeded,
+        "verification_succeeded": result.verification_succeeded,
+        "commit_gate_passed": result.commit_gate_passed,
+        "commit_gate_reason": result.commit_gate_reason or None,
+    }
+    payload.update(extra)
+    _perf_log.info("[file_operation_contract] %s", json.dumps(payload, ensure_ascii=False))


 async def _stream_agent_events(
@ -239,6 +337,8 @@ async def _stream_agent_events(
            tool_name = event.get("name", "unknown_tool")
            run_id = event.get("run_id", "")
            tool_input = event.get("data", {}).get("input", {})
+            if tool_name in ("write_file", "edit_file"):
+                result.write_attempted = True

            if current_text_id is not None:
                yield streaming_service.format_text_end(current_text_id)
@ -514,6 +614,14 @@ async def _stream_agent_events(
            else:
                tool_output = {"result": str(raw_output) if raw_output else "completed"}

+            if tool_name in ("write_file", "edit_file"):
+                if _tool_output_has_error(tool_output):
+                    # Keep successful evidence if a previous write/edit in this turn succeeded.
+                    pass
+                else:
+                    result.write_succeeded = True
+                    result.verification_succeeded = True
+
            tool_call_id = f"call_{run_id[:32]}" if run_id else "call_unknown"
            original_step_id = tool_step_ids.get(
                run_id, f"{step_prefix}-unknown-{run_id[:8]}"
@ -925,6 +1033,30 @@ async def _stream_agent_events(
                        f"Scrape failed: {error_msg}",
                        "error",
                    )
+            elif tool_name in ("write_file", "edit_file"):
+                resolved_path = _extract_resolved_file_path(
+                    tool_name=tool_name,
+                    tool_output=tool_output,
+                )
+                result_text = _tool_output_to_text(tool_output)
+                if _tool_output_has_error(tool_output):
+                    yield streaming_service.format_tool_output_available(
+                        tool_call_id,
+                        {
+                            "status": "error",
+                            "error": result_text,
+                            "path": resolved_path,
+                        },
+                    )
+                else:
+                    yield streaming_service.format_tool_output_available(
+                        tool_call_id,
+                        {
+                            "status": "completed",
+                            "path": resolved_path,
+                            "result": result_text,
+                        },
+                    )
            elif tool_name == "generate_report":
                # Stream the full report result so frontend can render the ReportCard
                yield streaming_service.format_tool_output_available(
@ -1143,10 +1275,59 @@ async def _stream_agent_events(
    if completion_event:
        yield completion_event

+    state = await agent.aget_state(config)
+    state_values = getattr(state, "values", {}) or {}
+    contract_state = state_values.get("file_operation_contract") or {}
+    contract_turn_id = contract_state.get("turn_id")
+    current_turn_id = config.get("configurable", {}).get("turn_id", "")
+    intent_value = contract_state.get("intent")
+    if (
+        isinstance(intent_value, str)
+        and intent_value in ("chat_only", "file_write", "file_read")
+        and contract_turn_id == current_turn_id
+    ):
+        result.intent_detected = intent_value
+    if (
+        isinstance(intent_value, str)
+        and intent_value in (
+            "chat_only",
+            "file_write",
+            "file_read",
+        )
+        and contract_turn_id != current_turn_id
+    ):
+        # Ignore stale intent contracts from previous turns/checkpoints.
+        result.intent_detected = "chat_only"
+    result.intent_confidence = (
+        _safe_float(contract_state.get("confidence"), default=0.0)
+        if contract_turn_id == current_turn_id
+        else 0.0
+    )
+
+    if result.intent_detected == "file_write":
+        result.commit_gate_passed, result.commit_gate_reason = (
+            _evaluate_file_contract_outcome(result)
+        )
+        if not result.commit_gate_passed:
+            if _contract_enforcement_active(result):
+                gate_notice = (
+                    "I could not complete the requested file write because no successful "
+                    "write_file/edit_file operation was confirmed."
+                )
+                gate_text_id = streaming_service.generate_text_id()
+                yield streaming_service.format_text_start(gate_text_id)
+                yield streaming_service.format_text_delta(gate_text_id, gate_notice)
+                yield streaming_service.format_text_end(gate_text_id)
+                yield streaming_service.format_terminal_info(gate_notice, "error")
+                accumulated_text = gate_notice
+    else:
+        result.commit_gate_passed = True
+        result.commit_gate_reason = ""
+
    result.accumulated_text = accumulated_text
    result.agent_called_update_memory = called_update_memory
+    _log_file_contract("turn_outcome", result)

-    state = await agent.aget_state(config)
    is_interrupted = state.tasks and any(task.interrupts for task in state.tasks)
    if is_interrupted:
        result.is_interrupted = True
@ -1167,6 +1348,8 @@ async def stream_new_chat(
    thread_visibility: ChatVisibility | None = None,
    current_user_display_name: str | None = None,
    disabled_tools: list[str] | None = None,
+    filesystem_selection: FilesystemSelection | None = None,
+    request_id: str | None = None,
 ) -> AsyncGenerator[str, None]:
    """
    Stream chat responses from the new SurfSense deep agent.
@ -1194,6 +1377,20 @@ async def stream_new_chat(
    streaming_service = VercelStreamingService()
    stream_result = StreamResult()
    _t_total = time.perf_counter()
+    fs_mode = filesystem_selection.mode.value if filesystem_selection else "cloud"
+    fs_platform = (
+        filesystem_selection.client_platform.value if filesystem_selection else "web"
+    )
+    stream_result.request_id = request_id
+    stream_result.turn_id = f"{chat_id}:{int(time.time() * 1000)}"
+    stream_result.filesystem_mode = fs_mode
+    stream_result.client_platform = fs_platform
+    _log_file_contract("turn_start", stream_result)
+    _perf_log.info(
+        "[stream_new_chat] filesystem_mode=%s client_platform=%s",
+        fs_mode,
+        fs_platform,
+    )
    log_system_snapshot("stream_new_chat_START")

    from app.services.token_tracking_service import start_turn
@ -1329,6 +1526,7 @@ async def stream_new_chat(
            thread_visibility=visibility,
            disabled_tools=disabled_tools,
            mentioned_document_ids=mentioned_document_ids,
+            filesystem_selection=filesystem_selection,
        )
        _perf_log.info(
            "[stream_new_chat] Agent created in %.3fs", time.perf_counter() - _t0
@ -1435,6 +1633,8 @@ async def stream_new_chat(
            # We will use this to simulate group chat functionality in the future
            "messages": langchain_messages,
            "search_space_id": search_space_id,
+            "request_id": request_id or "unknown",
+            "turn_id": stream_result.turn_id,
        }

        _perf_log.info(
@ -1464,6 +1664,8 @@ async def stream_new_chat(
        # Configure LangGraph with thread_id for memory
        # If checkpoint_id is provided, fork from that checkpoint (for edit/reload)
        configurable = {"thread_id": str(chat_id)}
+        configurable["request_id"] = request_id or "unknown"
+        configurable["turn_id"] = stream_result.turn_id
        if checkpoint_id:
            configurable["checkpoint_id"] = checkpoint_id

@ -1871,10 +2073,26 @@ async def stream_resume_chat(
    user_id: str | None = None,
    llm_config_id: int = -1,
    thread_visibility: ChatVisibility | None = None,
+    filesystem_selection: FilesystemSelection | None = None,
+    request_id: str | None = None,
 ) -> AsyncGenerator[str, None]:
    streaming_service = VercelStreamingService()
    stream_result = StreamResult()
    _t_total = time.perf_counter()
+    fs_mode = filesystem_selection.mode.value if filesystem_selection else "cloud"
+    fs_platform = (
+        filesystem_selection.client_platform.value if filesystem_selection else "web"
+    )
+    stream_result.request_id = request_id
+    stream_result.turn_id = f"{chat_id}:{int(time.time() * 1000)}"
+    stream_result.filesystem_mode = fs_mode
+    stream_result.client_platform = fs_platform
+    _log_file_contract("turn_start", stream_result)
+    _perf_log.info(
+        "[stream_resume] filesystem_mode=%s client_platform=%s",
+        fs_mode,
+        fs_platform,
+    )

    from app.services.token_tracking_service import start_turn

@ -1991,6 +2209,7 @@ async def stream_resume_chat(
            agent_config=agent_config,
            firecrawl_api_key=firecrawl_api_key,
            thread_visibility=visibility,
+            filesystem_selection=filesystem_selection,
        )
        _perf_log.info(
            "[stream_resume] Agent created in %.3fs", time.perf_counter() - _t0
@ -2009,7 +2228,11 @@ async def stream_resume_chat(
        from langgraph.types import Command

        config = {
-            "configurable": {"thread_id": str(chat_id)},
+            "configurable": {
+                "thread_id": str(chat_id),
+                "request_id": request_id or "unknown",
+                "turn_id": stream_result.turn_id,
+            },
            "recursion_limit": 80,
        }

--- a/surfsense_backend/app/tasks/connector_indexers/init.py
+++ b/surfsense_backend/app/tasks/connector_indexers/init.py
@ -1,75 +1,29 @@
 """
 Connector indexers module for background tasks.

-This module provides a collection of connector indexers for different platforms
-and services. Each indexer is responsible for handling the indexing of content
-from a specific connector type.
-
-Available indexers:
- Slack: Index messages from Slack channels
- Notion: Index pages from Notion workspaces
- GitHub: Index repositories and files from GitHub
- Linear: Index issues from Linear workspaces
- Jira: Index issues from Jira projects
- Confluence: Index pages from Confluence spaces
- BookStack: Index pages from BookStack wiki instances
- Discord: Index messages from Discord servers
- ClickUp: Index tasks from ClickUp workspaces
- Google Gmail: Index messages from Google Gmail
- Google Calendar: Index events from Google Calendar
- Luma: Index events from Luma
- Webcrawler: Index crawled URLs
- Elasticsearch: Index documents from Elasticsearch instances
+Each indexer handles content indexing from a specific connector type.
+Live connectors (Slack, Linear, Jira, ClickUp, Airtable, Discord, Teams,
+Luma) now use real-time agent tools instead of background indexing.
 """

-# Communication platforms
-# Calendar and scheduling
-from .airtable_indexer import index_airtable_records
 from .bookstack_indexer import index_bookstack_pages
-
-# Note: composio_indexer is imported directly in connector_tasks.py to avoid circular imports
-from .clickup_indexer import index_clickup_tasks
 from .confluence_indexer import index_confluence_pages
-from .discord_indexer import index_discord_messages
-
-# Development platforms
 from .elasticsearch_indexer import index_elasticsearch_documents
 from .github_indexer import index_github_repos
 from .google_calendar_indexer import index_google_calendar_events
 from .google_drive_indexer import index_google_drive_files
 from .google_gmail_indexer import index_google_gmail_messages
-from .jira_indexer import index_jira_issues
-
-# Issue tracking and project management
-from .linear_indexer import index_linear_issues
-
-# Documentation and knowledge management
-from .luma_indexer import index_luma_events
 from .notion_indexer import index_notion_pages
-from .slack_indexer import index_slack_messages
 from .webcrawler_indexer import index_crawled_urls

-__all__ = [  # noqa: RUF022
-    "index_airtable_records",
+__all__ = [
    "index_bookstack_pages",
-    # "index_composio_connector",  # Imported directly in connector_tasks.py to avoid circular imports
-    "index_clickup_tasks",
    "index_confluence_pages",
-    "index_discord_messages",
-    # Development platforms
+    "index_crawled_urls",
    "index_elasticsearch_documents",
    "index_github_repos",
-    # Calendar and scheduling
    "index_google_calendar_events",
    "index_google_drive_files",
-    "index_luma_events",
-    "index_jira_issues",
-    # Issue tracking and project management
-    "index_linear_issues",
-    # Documentation and knowledge management
-    "index_notion_pages",
-    "index_crawled_urls",
-    # Communication platforms
-    "index_slack_messages",
    "index_google_gmail_messages",
+    "index_notion_pages",
 ]