feat: added configable summary calculation and various improvements

- Replaced direct embedding calls with a utility function across various components to streamline embedding logic.
- Added enable_summary flag to several models and routes to control summary generation behavior.
This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-02-26 18:24:57 -08:00
parent dc33a4a68f
commit e9892c8fe9
50 changed files with 380 additions and 298 deletions

View file

@ -0,0 +1,46 @@
"""102_add_enable_summary_to_connectors
Revision ID: 102
Revises: 101
Create Date: 2026-02-26
Adds enable_summary boolean column to search_source_connectors.
Defaults to False for all existing and new connectors so LLM-based
summary generation is opt-in.
"""
from __future__ import annotations
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "102"
down_revision: str | None = "101"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
conn = op.get_bind()
existing_columns = [
col["name"] for col in sa.inspect(conn).get_columns("search_source_connectors")
]
if "enable_summary" not in existing_columns:
op.add_column(
"search_source_connectors",
sa.Column(
"enable_summary",
sa.Boolean(),
nullable=False,
server_default=sa.text("false"),
),
)
def downgrade() -> None:
op.drop_column("search_source_connectors", "enable_summary")

View file

@ -14,8 +14,8 @@ from langchain_core.tools import tool
from sqlalchemy import select from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.config import config
from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument
from app.utils.document_converters import embed_text
def format_surfsense_docs_results(results: list[tuple]) -> str: def format_surfsense_docs_results(results: list[tuple]) -> str:
@ -100,7 +100,7 @@ async def search_surfsense_docs_async(
Formatted string with relevant documentation content Formatted string with relevant documentation content
""" """
# Get embedding for the query # Get embedding for the query
query_embedding = config.embedding_model_instance.embed(query) query_embedding = embed_text(query)
# Vector similarity search on chunks, joining with documents # Vector similarity search on chunks, joining with documents
stmt = ( stmt = (

View file

@ -8,8 +8,8 @@ from langchain_core.tools import tool
from sqlalchemy import select from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.config import config
from app.db import MemoryCategory, SharedMemory, User from app.db import MemoryCategory, SharedMemory, User
from app.utils.document_converters import embed_text
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -64,7 +64,7 @@ async def save_shared_memory(
count = await get_shared_memory_count(db_session, search_space_id) count = await get_shared_memory_count(db_session, search_space_id)
if count >= MAX_MEMORIES_PER_SEARCH_SPACE: if count >= MAX_MEMORIES_PER_SEARCH_SPACE:
await delete_oldest_shared_memory(db_session, search_space_id) await delete_oldest_shared_memory(db_session, search_space_id)
embedding = config.embedding_model_instance.embed(content) embedding = embed_text(content)
row = SharedMemory( row = SharedMemory(
search_space_id=search_space_id, search_space_id=search_space_id,
created_by_id=_to_uuid(created_by_id), created_by_id=_to_uuid(created_by_id),
@ -108,7 +108,7 @@ async def recall_shared_memory(
if category and category in valid_categories: if category and category in valid_categories:
stmt = stmt.where(SharedMemory.category == MemoryCategory(category)) stmt = stmt.where(SharedMemory.category == MemoryCategory(category))
if query: if query:
query_embedding = config.embedding_model_instance.embed(query) query_embedding = embed_text(query)
stmt = stmt.order_by( stmt = stmt.order_by(
SharedMemory.embedding.op("<=>")(query_embedding) SharedMemory.embedding.op("<=>")(query_embedding)
).limit(top_k) ).limit(top_k)

View file

@ -17,8 +17,8 @@ from langchain_core.tools import tool
from sqlalchemy import select from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.config import config
from app.db import MemoryCategory, UserMemory from app.db import MemoryCategory, UserMemory
from app.utils.document_converters import embed_text
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -178,7 +178,7 @@ def create_save_memory_tool(
await delete_oldest_memory(db_session, user_id, search_space_id) await delete_oldest_memory(db_session, user_id, search_space_id)
# Generate embedding for the memory # Generate embedding for the memory
embedding = config.embedding_model_instance.embed(content) embedding = embed_text(content)
# Create new memory using ORM # Create new memory using ORM
# The pgvector Vector column type handles embedding conversion automatically # The pgvector Vector column type handles embedding conversion automatically
@ -268,7 +268,7 @@ def create_recall_memory_tool(
if query: if query:
# Semantic search using embeddings # Semantic search using embeddings
query_embedding = config.embedding_model_instance.embed(query) query_embedding = embed_text(query)
# Build query with vector similarity # Build query with vector similarity
stmt = ( stmt = (

View file

@ -14,7 +14,6 @@ from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select from sqlalchemy.future import select
from sqlalchemy.orm import selectinload from sqlalchemy.orm import selectinload
from app.config import config
from app.connectors.composio_connector import ComposioConnector from app.connectors.composio_connector import ComposioConnector
from app.db import Document, DocumentStatus, DocumentType from app.db import Document, DocumentStatus, DocumentType
from app.services.composio_service import TOOLKIT_TO_DOCUMENT_TYPE from app.services.composio_service import TOOLKIT_TO_DOCUMENT_TYPE
@ -27,6 +26,7 @@ from app.tasks.connector_indexers.base import (
) )
from app.utils.document_converters import ( from app.utils.document_converters import (
create_document_chunks, create_document_chunks,
embed_text,
generate_content_hash, generate_content_hash,
generate_document_summary, generate_document_summary,
generate_unique_identifier_hash, generate_unique_identifier_hash,
@ -383,6 +383,7 @@ async def _process_gmail_messages_phase2(
connector_id: int, connector_id: int,
search_space_id: int, search_space_id: int,
user_id: str, user_id: str,
enable_summary: bool = False,
on_heartbeat_callback: HeartbeatCallbackType | None = None, on_heartbeat_callback: HeartbeatCallbackType | None = None,
) -> tuple[int, int]: ) -> tuple[int, int]:
""" """
@ -415,7 +416,7 @@ async def _process_gmail_messages_phase2(
session, user_id, search_space_id session, user_id, search_space_id
) )
if user_llm: if user_llm and enable_summary:
document_metadata_for_summary = { document_metadata_for_summary = {
"message_id": item["message_id"], "message_id": item["message_id"],
"thread_id": item["thread_id"], "thread_id": item["thread_id"],
@ -427,10 +428,8 @@ async def _process_gmail_messages_phase2(
item["markdown_content"], user_llm, document_metadata_for_summary item["markdown_content"], user_llm, document_metadata_for_summary
) )
else: else:
summary_content = f"Gmail: {item['subject']}\n\nFrom: {item['sender']}\nDate: {item['date_str']}" summary_content = f"Gmail: {item['subject']}\n\nFrom: {item['sender']}\nDate: {item['date_str']}\n\n{item['markdown_content']}"
summary_embedding = config.embedding_model_instance.embed( summary_embedding = embed_text(summary_content)
summary_content
)
chunks = await create_document_chunks(item["markdown_content"]) chunks = await create_document_chunks(item["markdown_content"])
@ -646,6 +645,7 @@ async def index_composio_gmail(
connector_id=connector_id, connector_id=connector_id,
search_space_id=search_space_id, search_space_id=search_space_id,
user_id=user_id, user_id=user_id,
enable_summary=getattr(connector, "enable_summary", False),
on_heartbeat_callback=on_heartbeat_callback, on_heartbeat_callback=on_heartbeat_callback,
) )

View file

@ -14,7 +14,6 @@ from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select from sqlalchemy.future import select
from sqlalchemy.orm import selectinload from sqlalchemy.orm import selectinload
from app.config import config
from app.connectors.composio_connector import ComposioConnector from app.connectors.composio_connector import ComposioConnector
from app.db import Document, DocumentStatus, DocumentType from app.db import Document, DocumentStatus, DocumentType
from app.services.composio_service import TOOLKIT_TO_DOCUMENT_TYPE from app.services.composio_service import TOOLKIT_TO_DOCUMENT_TYPE
@ -27,6 +26,7 @@ from app.tasks.connector_indexers.base import (
) )
from app.utils.document_converters import ( from app.utils.document_converters import (
create_document_chunks, create_document_chunks,
embed_text,
generate_content_hash, generate_content_hash,
generate_document_summary, generate_document_summary,
generate_unique_identifier_hash, generate_unique_identifier_hash,
@ -440,7 +440,7 @@ async def index_composio_google_calendar(
session, user_id, search_space_id session, user_id, search_space_id
) )
if user_llm: if user_llm and connector.enable_summary:
document_metadata_for_summary = { document_metadata_for_summary = {
"event_id": item["event_id"], "event_id": item["event_id"],
"summary": item["summary"], "summary": item["summary"],
@ -456,12 +456,10 @@ async def index_composio_google_calendar(
document_metadata_for_summary, document_metadata_for_summary,
) )
else: else:
summary_content = f"Calendar: {item['summary']}\n\nStart: {item['start_time']}\nEnd: {item['end_time']}" summary_content = (
if item["location"]: f"Calendar: {item['summary']}\n\n{item['markdown_content']}"
summary_content += f"\nLocation: {item['location']}"
summary_embedding = config.embedding_model_instance.embed(
summary_content
) )
summary_embedding = embed_text(summary_content)
chunks = await create_document_chunks(item["markdown_content"]) chunks = await create_document_chunks(item["markdown_content"])

View file

@ -31,6 +31,7 @@ from app.tasks.connector_indexers.base import (
) )
from app.utils.document_converters import ( from app.utils.document_converters import (
create_document_chunks, create_document_chunks,
embed_text,
generate_content_hash, generate_content_hash,
generate_document_summary, generate_document_summary,
generate_unique_identifier_hash, generate_unique_identifier_hash,
@ -714,6 +715,7 @@ async def index_composio_google_drive(
max_items=max_items, max_items=max_items,
task_logger=task_logger, task_logger=task_logger,
log_entry=log_entry, log_entry=log_entry,
enable_summary=getattr(connector, "enable_summary", False),
on_heartbeat_callback=on_heartbeat_callback, on_heartbeat_callback=on_heartbeat_callback,
) )
else: else:
@ -747,6 +749,7 @@ async def index_composio_google_drive(
max_items=max_items, max_items=max_items,
task_logger=task_logger, task_logger=task_logger,
log_entry=log_entry, log_entry=log_entry,
enable_summary=getattr(connector, "enable_summary", False),
on_heartbeat_callback=on_heartbeat_callback, on_heartbeat_callback=on_heartbeat_callback,
) )
@ -829,6 +832,7 @@ async def _index_composio_drive_delta_sync(
max_items: int, max_items: int,
task_logger: TaskLoggingService, task_logger: TaskLoggingService,
log_entry, log_entry,
enable_summary: bool = False,
on_heartbeat_callback: HeartbeatCallbackType | None = None, on_heartbeat_callback: HeartbeatCallbackType | None = None,
) -> tuple[int, int, list[str]]: ) -> tuple[int, int, list[str]]:
"""Index Google Drive files using delta sync with real-time document status updates. """Index Google Drive files using delta sync with real-time document status updates.
@ -1079,7 +1083,7 @@ async def _index_composio_drive_delta_sync(
session, user_id, search_space_id session, user_id, search_space_id
) )
if user_llm: if user_llm and enable_summary:
document_metadata_for_summary = { document_metadata_for_summary = {
"file_id": item["file_id"], "file_id": item["file_id"],
"file_name": item["file_name"], "file_name": item["file_name"],
@ -1090,10 +1094,8 @@ async def _index_composio_drive_delta_sync(
markdown_content, user_llm, document_metadata_for_summary markdown_content, user_llm, document_metadata_for_summary
) )
else: else:
summary_content = f"Google Drive File: {item['file_name']}\n\nType: {item['mime_type']}" summary_content = f"Google Drive File: {item['file_name']}\n\nType: {item['mime_type']}\n\n{markdown_content}"
summary_embedding = config.embedding_model_instance.embed( summary_embedding = embed_text(summary_content)
summary_content
)
chunks = await create_document_chunks(markdown_content) chunks = await create_document_chunks(markdown_content)
@ -1155,6 +1157,7 @@ async def _index_composio_drive_full_scan(
max_items: int, max_items: int,
task_logger: TaskLoggingService, task_logger: TaskLoggingService,
log_entry, log_entry,
enable_summary: bool = False,
on_heartbeat_callback: HeartbeatCallbackType | None = None, on_heartbeat_callback: HeartbeatCallbackType | None = None,
) -> tuple[int, int, list[str]]: ) -> tuple[int, int, list[str]]:
"""Index Google Drive files using full scan with real-time document status updates.""" """Index Google Drive files using full scan with real-time document status updates."""
@ -1488,7 +1491,7 @@ async def _index_composio_drive_full_scan(
session, user_id, search_space_id session, user_id, search_space_id
) )
if user_llm: if user_llm and enable_summary:
document_metadata_for_summary = { document_metadata_for_summary = {
"file_id": item["file_id"], "file_id": item["file_id"],
"file_name": item["file_name"], "file_name": item["file_name"],
@ -1499,10 +1502,8 @@ async def _index_composio_drive_full_scan(
markdown_content, user_llm, document_metadata_for_summary markdown_content, user_llm, document_metadata_for_summary
) )
else: else:
summary_content = f"Google Drive File: {item['file_name']}\n\nType: {item['mime_type']}" summary_content = f"Google Drive File: {item['file_name']}\n\nType: {item['mime_type']}\n\n{markdown_content}"
summary_embedding = config.embedding_model_instance.embed( summary_embedding = embed_text(summary_content)
summary_content
)
chunks = await create_document_chunks(markdown_content) chunks = await create_document_chunks(markdown_content)

View file

@ -1,6 +1,6 @@
from collections.abc import AsyncGenerator from collections.abc import AsyncGenerator
from datetime import UTC, datetime from datetime import UTC, datetime
from enum import Enum from enum import StrEnum
from fastapi import Depends from fastapi import Depends
from fastapi_users.db import SQLAlchemyBaseUserTableUUID, SQLAlchemyUserDatabase from fastapi_users.db import SQLAlchemyBaseUserTableUUID, SQLAlchemyUserDatabase
@ -31,7 +31,7 @@ if config.AUTH_TYPE == "GOOGLE":
DATABASE_URL = config.DATABASE_URL DATABASE_URL = config.DATABASE_URL
class DocumentType(str, Enum): class DocumentType(StrEnum):
EXTENSION = "EXTENSION" EXTENSION = "EXTENSION"
CRAWLED_URL = "CRAWLED_URL" CRAWLED_URL = "CRAWLED_URL"
FILE = "FILE" FILE = "FILE"
@ -60,7 +60,7 @@ class DocumentType(str, Enum):
COMPOSIO_GOOGLE_CALENDAR_CONNECTOR = "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR" COMPOSIO_GOOGLE_CALENDAR_CONNECTOR = "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR"
class SearchSourceConnectorType(str, Enum): class SearchSourceConnectorType(StrEnum):
SERPER_API = "SERPER_API" # NOT IMPLEMENTED YET : DON'T REMEMBER WHY : MOST PROBABLY BECAUSE WE NEED TO CRAWL THE RESULTS RETURNED BY IT SERPER_API = "SERPER_API" # NOT IMPLEMENTED YET : DON'T REMEMBER WHY : MOST PROBABLY BECAUSE WE NEED TO CRAWL THE RESULTS RETURNED BY IT
TAVILY_API = "TAVILY_API" TAVILY_API = "TAVILY_API"
SEARXNG_API = "SEARXNG_API" SEARXNG_API = "SEARXNG_API"
@ -93,7 +93,7 @@ class SearchSourceConnectorType(str, Enum):
COMPOSIO_GOOGLE_CALENDAR_CONNECTOR = "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR" COMPOSIO_GOOGLE_CALENDAR_CONNECTOR = "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR"
class PodcastStatus(str, Enum): class PodcastStatus(StrEnum):
PENDING = "pending" PENDING = "pending"
GENERATING = "generating" GENERATING = "generating"
READY = "ready" READY = "ready"
@ -177,7 +177,7 @@ class DocumentStatus:
return None return None
class LiteLLMProvider(str, Enum): class LiteLLMProvider(StrEnum):
""" """
Enum for LLM providers supported by LiteLLM. Enum for LLM providers supported by LiteLLM.
""" """
@ -215,7 +215,7 @@ class LiteLLMProvider(str, Enum):
CUSTOM = "CUSTOM" CUSTOM = "CUSTOM"
class ImageGenProvider(str, Enum): class ImageGenProvider(StrEnum):
""" """
Enum for image generation providers supported by LiteLLM. Enum for image generation providers supported by LiteLLM.
This is a subset of LLM providers only those that support image generation. This is a subset of LLM providers only those that support image generation.
@ -233,7 +233,7 @@ class ImageGenProvider(str, Enum):
NSCALE = "NSCALE" NSCALE = "NSCALE"
class LogLevel(str, Enum): class LogLevel(StrEnum):
DEBUG = "DEBUG" DEBUG = "DEBUG"
INFO = "INFO" INFO = "INFO"
WARNING = "WARNING" WARNING = "WARNING"
@ -241,13 +241,13 @@ class LogLevel(str, Enum):
CRITICAL = "CRITICAL" CRITICAL = "CRITICAL"
class LogStatus(str, Enum): class LogStatus(StrEnum):
IN_PROGRESS = "IN_PROGRESS" IN_PROGRESS = "IN_PROGRESS"
SUCCESS = "SUCCESS" SUCCESS = "SUCCESS"
FAILED = "FAILED" FAILED = "FAILED"
class IncentiveTaskType(str, Enum): class IncentiveTaskType(StrEnum):
""" """
Enum for incentive task types that users can complete to earn free pages. Enum for incentive task types that users can complete to earn free pages.
Each task can only be completed once per user. Each task can only be completed once per user.
@ -298,7 +298,7 @@ INCENTIVE_TASKS_CONFIG = {
} }
class Permission(str, Enum): class Permission(StrEnum):
""" """
Granular permissions for search space resources. Granular permissions for search space resources.
Use '*' (FULL_ACCESS) to grant all permissions. Use '*' (FULL_ACCESS) to grant all permissions.
@ -471,7 +471,7 @@ class BaseModel(Base):
id = Column(Integer, primary_key=True, index=True) id = Column(Integer, primary_key=True, index=True)
class NewChatMessageRole(str, Enum): class NewChatMessageRole(StrEnum):
"""Role enum for new chat messages.""" """Role enum for new chat messages."""
USER = "user" USER = "user"
@ -479,7 +479,7 @@ class NewChatMessageRole(str, Enum):
SYSTEM = "system" SYSTEM = "system"
class ChatVisibility(str, Enum): class ChatVisibility(StrEnum):
""" """
Visibility/sharing level for chat threads. Visibility/sharing level for chat threads.
@ -788,7 +788,7 @@ class ChatSessionState(BaseModel):
ai_responding_to_user = relationship("User") ai_responding_to_user = relationship("User")
class MemoryCategory(str, Enum): class MemoryCategory(StrEnum):
"""Categories for user memories.""" """Categories for user memories."""
# Using lowercase keys to match PostgreSQL enum values # Using lowercase keys to match PostgreSQL enum values
@ -1317,6 +1317,12 @@ class SearchSourceConnector(BaseModel, TimestampMixin):
last_indexed_at = Column(TIMESTAMP(timezone=True), nullable=True) last_indexed_at = Column(TIMESTAMP(timezone=True), nullable=True)
config = Column(JSON, nullable=False) config = Column(JSON, nullable=False)
# Summary generation (LLM-based) - disabled by default to save resources.
# When enabled, improves hybrid search quality at the cost of LLM calls.
enable_summary = Column(
Boolean, nullable=False, default=False, server_default="false"
)
# Periodic indexing fields # Periodic indexing fields
periodic_indexing_enabled = Column(Boolean, nullable=False, default=False) periodic_indexing_enabled = Column(Boolean, nullable=False, default=False)
indexing_frequency_minutes = Column(Integer, nullable=True) indexing_frequency_minutes = Column(Integer, nullable=True)

View file

@ -13,6 +13,7 @@ async def index_uploaded_file(
user_id: str, user_id: str,
session: AsyncSession, session: AsyncSession,
llm, llm,
should_summarize: bool = False,
) -> None: ) -> None:
connector_doc = ConnectorDocument( connector_doc = ConnectorDocument(
title=filename, title=filename,
@ -22,7 +23,7 @@ async def index_uploaded_file(
search_space_id=search_space_id, search_space_id=search_space_id,
created_by_id=user_id, created_by_id=user_id,
connector_id=None, connector_id=None,
should_summarize=True, should_summarize=should_summarize,
should_use_code_chunker=False, should_use_code_chunker=False,
fallback_summary=markdown_content[:4000], fallback_summary=markdown_content[:4000],
metadata={ metadata={

View file

@ -1,6 +1,3 @@
from app.config import config from app.utils.document_converters import embed_text
__all__ = ["embed_text"]
def embed_text(text: str) -> list[float]:
"""Embed a single text string using the configured embedding model."""
return config.embedding_model_instance.embed(text)

View file

@ -118,6 +118,7 @@ async def create_documents(
async def create_documents_file_upload( async def create_documents_file_upload(
files: list[UploadFile], files: list[UploadFile],
search_space_id: int = Form(...), search_space_id: int = Form(...),
should_summarize: bool = Form(False),
session: AsyncSession = Depends(get_async_session), session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user), user: User = Depends(current_active_user),
): ):
@ -303,6 +304,7 @@ async def create_documents_file_upload(
filename=filename, filename=filename,
search_space_id=search_space_id, search_space_id=search_space_id,
user_id=str(user.id), user_id=str(user.id),
should_summarize=should_summarize,
) )
return { return {

View file

@ -17,7 +17,7 @@ import logging
import os import os
import re import re
import tempfile import tempfile
from enum import Enum from enum import StrEnum
import pypandoc import pypandoc
import typst import typst
@ -46,7 +46,7 @@ router = APIRouter()
MAX_REPORT_LIST_LIMIT = 500 MAX_REPORT_LIST_LIMIT = 500
class ExportFormat(str, Enum): class ExportFormat(StrEnum):
PDF = "pdf" PDF = "pdf"
DOCX = "docx" DOCX = "docx"

View file

@ -1,13 +1,13 @@
"""Podcast schemas for API responses.""" """Podcast schemas for API responses."""
from datetime import datetime from datetime import datetime
from enum import Enum from enum import StrEnum
from typing import Any from typing import Any
from pydantic import BaseModel from pydantic import BaseModel
class PodcastStatusEnum(str, Enum): class PodcastStatusEnum(StrEnum):
PENDING = "pending" PENDING = "pending"
GENERATING = "generating" GENERATING = "generating"
READY = "ready" READY = "ready"

View file

@ -16,6 +16,7 @@ class SearchSourceConnectorBase(BaseModel):
is_indexable: bool is_indexable: bool
last_indexed_at: datetime | None = None last_indexed_at: datetime | None = None
config: dict[str, Any] config: dict[str, Any]
enable_summary: bool = False
periodic_indexing_enabled: bool = False periodic_indexing_enabled: bool = False
indexing_frequency_minutes: int | None = None indexing_frequency_minutes: int | None = None
next_scheduled_at: datetime | None = None next_scheduled_at: datetime | None = None
@ -65,6 +66,7 @@ class SearchSourceConnectorUpdate(BaseModel):
is_indexable: bool | None = None is_indexable: bool | None = None
last_indexed_at: datetime | None = None last_indexed_at: datetime | None = None
config: dict[str, Any] | None = None config: dict[str, Any] | None = None
enable_summary: bool | None = None
periodic_indexing_enabled: bool | None = None periodic_indexing_enabled: bool | None = None
indexing_frequency_minutes: int | None = None indexing_frequency_minutes: int | None = None
next_scheduled_at: datetime | None = None next_scheduled_at: datetime | None = None

View file

@ -4,12 +4,12 @@ from datetime import datetime
from sqlalchemy import delete from sqlalchemy import delete
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.config import config
from app.connectors.linear_connector import LinearConnector from app.connectors.linear_connector import LinearConnector
from app.db import Chunk, Document from app.db import Chunk, Document
from app.services.llm_service import get_user_long_context_llm from app.services.llm_service import get_user_long_context_llm
from app.utils.document_converters import ( from app.utils.document_converters import (
create_document_chunks, create_document_chunks,
embed_text,
generate_content_hash, generate_content_hash,
generate_document_summary, generate_document_summary,
) )
@ -80,7 +80,7 @@ class LinearKBSyncService:
state = formatted_issue.get("state", "Unknown") state = formatted_issue.get("state", "Unknown")
priority = issue_raw.get("priorityLabel", "Unknown") priority = issue_raw.get("priorityLabel", "Unknown")
comment_count = len(formatted_issue.get("comments", [])) comment_count = len(formatted_issue.get("comments", []))
description = formatted_issue.get("description", "") formatted_issue.get("description", "")
user_llm = await get_user_long_context_llm( user_llm = await get_user_long_context_llm(
self.db_session, user_id, search_space_id, disable_streaming=True self.db_session, user_id, search_space_id, disable_streaming=True
@ -100,18 +100,10 @@ class LinearKBSyncService:
issue_content, user_llm, document_metadata_for_summary issue_content, user_llm, document_metadata_for_summary
) )
else: else:
if description and len(description) > 1000:
description = description[:997] + "..."
summary_content = ( summary_content = (
f"Linear Issue {issue_identifier}: {issue_title}\n\n" f"Linear Issue {issue_identifier}: {issue_title}\n\n{issue_content}"
f"Status: {state}\n\n"
)
if description:
summary_content += f"Description: {description}\n\n"
summary_content += f"Comments: {comment_count}"
summary_embedding = config.embedding_model_instance.embed(
summary_content
) )
summary_embedding = embed_text(summary_content)
await self.db_session.execute( await self.db_session.execute(
delete(Chunk).where(Chunk.document_id == document.id) delete(Chunk).where(Chunk.document_id == document.id)

View file

@ -15,10 +15,12 @@ import logging
from typing import Any from typing import Any
from langchain_core.callbacks import CallbackManagerForLLMRun from langchain_core.callbacks import CallbackManagerForLLMRun
from langchain_core.exceptions import ContextOverflowError
from langchain_core.language_models import BaseChatModel from langchain_core.language_models import BaseChatModel
from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage
from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
from litellm import Router from litellm import Router
from litellm.exceptions import ContextWindowExceededError
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -359,13 +361,15 @@ class ChatLiteLLMRouter(BaseChatModel):
if self._tool_choice is not None: if self._tool_choice is not None:
call_kwargs["tool_choice"] = self._tool_choice call_kwargs["tool_choice"] = self._tool_choice
# Call router completion try:
response = self._router.completion( response = self._router.completion(
model=self.model, model=self.model,
messages=formatted_messages, messages=formatted_messages,
stop=stop, stop=stop,
**call_kwargs, **call_kwargs,
) )
except ContextWindowExceededError as e:
raise ContextOverflowError(str(e)) from e
# Convert response to ChatResult with potential tool calls # Convert response to ChatResult with potential tool calls
message = self._convert_response_to_message(response.choices[0].message) message = self._convert_response_to_message(response.choices[0].message)
@ -396,13 +400,15 @@ class ChatLiteLLMRouter(BaseChatModel):
if self._tool_choice is not None: if self._tool_choice is not None:
call_kwargs["tool_choice"] = self._tool_choice call_kwargs["tool_choice"] = self._tool_choice
# Call router async completion try:
response = await self._router.acompletion( response = await self._router.acompletion(
model=self.model, model=self.model,
messages=formatted_messages, messages=formatted_messages,
stop=stop, stop=stop,
**call_kwargs, **call_kwargs,
) )
except ContextWindowExceededError as e:
raise ContextOverflowError(str(e)) from e
# Convert response to ChatResult with potential tool calls # Convert response to ChatResult with potential tool calls
message = self._convert_response_to_message(response.choices[0].message) message = self._convert_response_to_message(response.choices[0].message)
@ -432,7 +438,7 @@ class ChatLiteLLMRouter(BaseChatModel):
if self._tool_choice is not None: if self._tool_choice is not None:
call_kwargs["tool_choice"] = self._tool_choice call_kwargs["tool_choice"] = self._tool_choice
# Call router completion with streaming try:
response = self._router.completion( response = self._router.completion(
model=self.model, model=self.model,
messages=formatted_messages, messages=formatted_messages,
@ -440,6 +446,8 @@ class ChatLiteLLMRouter(BaseChatModel):
stream=True, stream=True,
**call_kwargs, **call_kwargs,
) )
except ContextWindowExceededError as e:
raise ContextOverflowError(str(e)) from e
# Yield chunks # Yield chunks
for chunk in response: for chunk in response:
@ -471,7 +479,7 @@ class ChatLiteLLMRouter(BaseChatModel):
if self._tool_choice is not None: if self._tool_choice is not None:
call_kwargs["tool_choice"] = self._tool_choice call_kwargs["tool_choice"] = self._tool_choice
# Call router async completion with streaming try:
response = await self._router.acompletion( response = await self._router.acompletion(
model=self.model, model=self.model,
messages=formatted_messages, messages=formatted_messages,
@ -479,6 +487,8 @@ class ChatLiteLLMRouter(BaseChatModel):
stream=True, stream=True,
**call_kwargs, **call_kwargs,
) )
except ContextWindowExceededError as e:
raise ContextOverflowError(str(e)) from e
# Yield chunks asynchronously # Yield chunks asynchronously
async for chunk in response: async for chunk in response:

View file

@ -4,11 +4,11 @@ from datetime import datetime
from sqlalchemy import delete from sqlalchemy import delete
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.config import config
from app.db import Chunk, Document from app.db import Chunk, Document
from app.services.llm_service import get_user_long_context_llm from app.services.llm_service import get_user_long_context_llm
from app.utils.document_converters import ( from app.utils.document_converters import (
create_document_chunks, create_document_chunks,
embed_text,
generate_content_hash, generate_content_hash,
generate_document_summary, generate_document_summary,
) )
@ -127,10 +127,8 @@ class NotionKBSyncService:
logger.debug(f"Generated summary length: {len(summary_content)} chars") logger.debug(f"Generated summary length: {len(summary_content)} chars")
else: else:
logger.warning("No LLM configured - using fallback summary") logger.warning("No LLM configured - using fallback summary")
summary_content = f"Notion Page: {document.document_metadata.get('page_title')}\n\n{full_content[:500]}..." summary_content = f"Notion Page: {document.document_metadata.get('page_title')}\n\n{full_content}"
summary_embedding = config.embedding_model_instance.embed( summary_embedding = embed_text(summary_content)
summary_content
)
logger.debug(f"Deleting old chunks for document {document_id}") logger.debug(f"Deleting old chunks for document {document_id}")
await self.db_session.execute( await self.db_session.execute(

View file

@ -626,6 +626,7 @@ def process_file_upload_with_document_task(
filename: str, filename: str,
search_space_id: int, search_space_id: int,
user_id: str, user_id: str,
should_summarize: bool = False,
): ):
""" """
Celery task to process uploaded file with existing pending document. Celery task to process uploaded file with existing pending document.
@ -640,6 +641,7 @@ def process_file_upload_with_document_task(
filename: Original filename filename: Original filename
search_space_id: ID of the search space search_space_id: ID of the search space
user_id: ID of the user user_id: ID of the user
should_summarize: Whether to generate an LLM summary
""" """
import traceback import traceback
@ -674,7 +676,12 @@ def process_file_upload_with_document_task(
try: try:
loop.run_until_complete( loop.run_until_complete(
_process_file_with_document( _process_file_with_document(
document_id, temp_path, filename, search_space_id, user_id document_id,
temp_path,
filename,
search_space_id,
user_id,
should_summarize=should_summarize,
) )
) )
logger.info( logger.info(
@ -710,6 +717,7 @@ async def _process_file_with_document(
filename: str, filename: str,
search_space_id: int, search_space_id: int,
user_id: str, user_id: str,
should_summarize: bool = False,
): ):
""" """
Process file and update existing pending document status. Process file and update existing pending document status.
@ -811,6 +819,7 @@ async def _process_file_with_document(
task_logger=task_logger, task_logger=task_logger,
log_entry=log_entry, log_entry=log_entry,
notification=notification, notification=notification,
should_summarize=should_summarize,
) )
# Update notification on success # Update notification on success

View file

@ -12,13 +12,13 @@ from collections.abc import Awaitable, Callable
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.config import config
from app.connectors.airtable_history import AirtableHistoryConnector from app.connectors.airtable_history import AirtableHistoryConnector
from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
from app.services.llm_service import get_user_long_context_llm from app.services.llm_service import get_user_long_context_llm
from app.services.task_logging_service import TaskLoggingService from app.services.task_logging_service import TaskLoggingService
from app.utils.document_converters import ( from app.utils.document_converters import (
create_document_chunks, create_document_chunks,
embed_text,
generate_content_hash, generate_content_hash,
generate_document_summary, generate_document_summary,
generate_unique_identifier_hash, generate_unique_identifier_hash,
@ -399,7 +399,7 @@ async def index_airtable_records(
session, user_id, search_space_id session, user_id, search_space_id
) )
if user_llm: if user_llm and connector.enable_summary:
document_metadata_for_summary = { document_metadata_for_summary = {
"record_id": item["record_id"], "record_id": item["record_id"],
"created_time": item["record"].get("CREATED_TIME()", ""), "created_time": item["record"].get("CREATED_TIME()", ""),
@ -415,11 +415,8 @@ async def index_airtable_records(
document_metadata_for_summary, document_metadata_for_summary,
) )
else: else:
# Fallback to simple summary if no LLM configured summary_content = f"Airtable Record: {item['record_id']}\n\n{item['markdown_content']}"
summary_content = f"Airtable Record: {item['record_id']}\n\n" summary_embedding = embed_text(summary_content)
summary_embedding = config.embedding_model_instance.embed(
summary_content
)
chunks = await create_document_chunks(item["markdown_content"]) chunks = await create_document_chunks(item["markdown_content"])

View file

@ -13,13 +13,13 @@ from datetime import datetime
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.config import config
from app.connectors.bookstack_connector import BookStackConnector from app.connectors.bookstack_connector import BookStackConnector
from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
from app.services.llm_service import get_user_long_context_llm from app.services.llm_service import get_user_long_context_llm
from app.services.task_logging_service import TaskLoggingService from app.services.task_logging_service import TaskLoggingService
from app.utils.document_converters import ( from app.utils.document_converters import (
create_document_chunks, create_document_chunks,
embed_text,
generate_content_hash, generate_content_hash,
generate_document_summary, generate_document_summary,
generate_unique_identifier_hash, generate_unique_identifier_hash,
@ -403,7 +403,7 @@ async def index_bookstack_pages(
"connector_id": connector_id, "connector_id": connector_id,
} }
if user_llm: if user_llm and connector.enable_summary:
summary_metadata = { summary_metadata = {
"page_name": item["page_name"], "page_name": item["page_name"],
"page_id": item["page_id"], "page_id": item["page_id"],
@ -418,17 +418,8 @@ async def index_bookstack_pages(
item["full_content"], user_llm, summary_metadata item["full_content"], user_llm, summary_metadata
) )
else: else:
# Fallback to simple summary if no LLM configured summary_content = f"BookStack Page: {item['page_name']}\n\nBook ID: {item['book_id']}\n\n{item['full_content']}"
summary_content = f"BookStack Page: {item['page_name']}\n\nBook ID: {item['book_id']}\n\n" summary_embedding = embed_text(summary_content)
if item["page_content"]:
# Take first 1000 characters of content for summary
content_preview = item["page_content"][:1000]
if len(item["page_content"]) > 1000:
content_preview += "..."
summary_content += f"Content Preview: {content_preview}\n\n"
summary_embedding = config.embedding_model_instance.embed(
summary_content
)
# Process chunks - using the full page content # Process chunks - using the full page content
chunks = await create_document_chunks(item["full_content"]) chunks = await create_document_chunks(item["full_content"])

View file

@ -14,13 +14,13 @@ from datetime import datetime
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.config import config
from app.connectors.clickup_history import ClickUpHistoryConnector from app.connectors.clickup_history import ClickUpHistoryConnector
from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
from app.services.llm_service import get_user_long_context_llm from app.services.llm_service import get_user_long_context_llm
from app.services.task_logging_service import TaskLoggingService from app.services.task_logging_service import TaskLoggingService
from app.utils.document_converters import ( from app.utils.document_converters import (
create_document_chunks, create_document_chunks,
embed_text,
generate_content_hash, generate_content_hash,
generate_document_summary, generate_document_summary,
generate_unique_identifier_hash, generate_unique_identifier_hash,
@ -398,7 +398,7 @@ async def index_clickup_tasks(
session, user_id, search_space_id session, user_id, search_space_id
) )
if user_llm: if user_llm and connector.enable_summary:
document_metadata_for_summary = { document_metadata_for_summary = {
"task_id": item["task_id"], "task_id": item["task_id"],
"task_name": item["task_name"], "task_name": item["task_name"],
@ -418,9 +418,7 @@ async def index_clickup_tasks(
) )
else: else:
summary_content = item["task_content"] summary_content = item["task_content"]
summary_embedding = config.embedding_model_instance.embed( summary_embedding = embed_text(item["task_content"])
item["task_content"]
)
chunks = await create_document_chunks(item["task_content"]) chunks = await create_document_chunks(item["task_content"])

View file

@ -14,13 +14,13 @@ from datetime import datetime
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.config import config
from app.connectors.confluence_history import ConfluenceHistoryConnector from app.connectors.confluence_history import ConfluenceHistoryConnector
from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
from app.services.llm_service import get_user_long_context_llm from app.services.llm_service import get_user_long_context_llm
from app.services.task_logging_service import TaskLoggingService from app.services.task_logging_service import TaskLoggingService
from app.utils.document_converters import ( from app.utils.document_converters import (
create_document_chunks, create_document_chunks,
embed_text,
generate_content_hash, generate_content_hash,
generate_document_summary, generate_document_summary,
generate_unique_identifier_hash, generate_unique_identifier_hash,
@ -378,7 +378,7 @@ async def index_confluence_pages(
session, user_id, search_space_id session, user_id, search_space_id
) )
if user_llm: if user_llm and connector.enable_summary:
document_metadata = { document_metadata = {
"page_title": item["page_title"], "page_title": item["page_title"],
"page_id": item["page_id"], "page_id": item["page_id"],
@ -394,18 +394,8 @@ async def index_confluence_pages(
item["full_content"], user_llm, document_metadata item["full_content"], user_llm, document_metadata
) )
else: else:
# Fallback to simple summary if no LLM configured summary_content = f"Confluence Page: {item['page_title']}\n\nSpace ID: {item['space_id']}\n\n{item['full_content']}"
summary_content = f"Confluence Page: {item['page_title']}\n\nSpace ID: {item['space_id']}\n\n" summary_embedding = embed_text(summary_content)
if item["page_content"]:
# Take first 1000 characters of content for summary
content_preview = item["page_content"][:1000]
if len(item["page_content"]) > 1000:
content_preview += "..."
summary_content += f"Content Preview: {content_preview}\n\n"
summary_content += f"Comments: {item['comment_count']}"
summary_embedding = config.embedding_model_instance.embed(
summary_content
)
# Process chunks - using the full page content with comments # Process chunks - using the full page content with comments
chunks = await create_document_chunks(item["full_content"]) chunks = await create_document_chunks(item["full_content"])

View file

@ -23,6 +23,7 @@ from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnector
from app.services.task_logging_service import TaskLoggingService from app.services.task_logging_service import TaskLoggingService
from app.utils.document_converters import ( from app.utils.document_converters import (
create_document_chunks, create_document_chunks,
embed_text,
generate_content_hash, generate_content_hash,
generate_unique_identifier_hash, generate_unique_identifier_hash,
) )
@ -669,9 +670,7 @@ async def index_discord_messages(
# Heavy processing (embeddings, chunks) # Heavy processing (embeddings, chunks)
chunks = await create_document_chunks(item["combined_document_string"]) chunks = await create_document_chunks(item["combined_document_string"])
doc_embedding = config.embedding_model_instance.embed( doc_embedding = embed_text(item["combined_document_string"])
item["combined_document_string"]
)
# Update document to READY with actual content # Update document to READY with actual content
document.title = f"{item['guild_name']}#{item['channel_name']}" document.title = f"{item['guild_name']}#{item['channel_name']}"

View file

@ -16,13 +16,13 @@ from datetime import UTC, datetime
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.config import config
from app.connectors.github_connector import GitHubConnector from app.connectors.github_connector import GitHubConnector
from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
from app.services.llm_service import get_user_long_context_llm from app.services.llm_service import get_user_long_context_llm
from app.services.task_logging_service import TaskLoggingService from app.services.task_logging_service import TaskLoggingService
from app.utils.document_converters import ( from app.utils.document_converters import (
create_document_chunks, create_document_chunks,
embed_text,
generate_content_hash, generate_content_hash,
generate_document_summary, generate_document_summary,
generate_unique_identifier_hash, generate_unique_identifier_hash,
@ -367,7 +367,7 @@ async def index_github_repos(
"estimated_tokens": digest.estimated_tokens, "estimated_tokens": digest.estimated_tokens,
} }
if user_llm: if user_llm and connector.enable_summary:
# Prepare content for summarization # Prepare content for summarization
summary_content = digest.full_digest summary_content = digest.full_digest
if len(summary_content) > MAX_DIGEST_CHARS: if len(summary_content) > MAX_DIGEST_CHARS:
@ -381,15 +381,12 @@ async def index_github_repos(
summary_content, user_llm, document_metadata_for_summary summary_content, user_llm, document_metadata_for_summary
) )
else: else:
# Fallback to simple summary if no LLM configured
summary_text = ( summary_text = (
f"# GitHub Repository: {repo_full_name}\n\n" f"# GitHub Repository: {repo_full_name}\n\n"
f"## Summary\n{digest.summary}\n\n" f"## Summary\n{digest.summary}\n\n"
f"## File Structure\n{digest.tree[:3000]}" f"## File Structure\n{digest.tree}"
)
summary_embedding = config.embedding_model_instance.embed(
summary_text
) )
summary_embedding = embed_text(summary_text)
# Chunk the full digest content for granular search # Chunk the full digest content for granular search
try: try:
@ -551,7 +548,7 @@ async def _simple_chunk_content(content: str, chunk_size: int = 4000) -> list:
chunks.append( chunks.append(
Chunk( Chunk(
content=chunk_text, content=chunk_text,
embedding=config.embedding_model_instance.embed(chunk_text), embedding=embed_text(chunk_text),
) )
) )

View file

@ -20,6 +20,7 @@ from app.services.llm_service import get_user_long_context_llm
from app.services.task_logging_service import TaskLoggingService from app.services.task_logging_service import TaskLoggingService
from app.utils.document_converters import ( from app.utils.document_converters import (
create_document_chunks, create_document_chunks,
embed_text,
generate_content_hash, generate_content_hash,
generate_document_summary, generate_document_summary,
generate_unique_identifier_hash, generate_unique_identifier_hash,
@ -489,7 +490,7 @@ async def index_google_calendar_events(
session, user_id, search_space_id session, user_id, search_space_id
) )
if user_llm: if user_llm and connector.enable_summary:
document_metadata_for_summary = { document_metadata_for_summary = {
"event_id": item["event_id"], "event_id": item["event_id"],
"event_summary": item["event_summary"], "event_summary": item["event_summary"],
@ -507,22 +508,8 @@ async def index_google_calendar_events(
item["event_markdown"], user_llm, document_metadata_for_summary item["event_markdown"], user_llm, document_metadata_for_summary
) )
else: else:
summary_content = ( summary_content = f"Google Calendar Event: {item['event_summary']}\n\n{item['event_markdown']}"
f"Google Calendar Event: {item['event_summary']}\n\n" summary_embedding = embed_text(summary_content)
)
summary_content += f"Calendar: {item['calendar_id']}\n"
summary_content += f"Start: {item['start_time']}\n"
summary_content += f"End: {item['end_time']}\n"
if item["location"]:
summary_content += f"Location: {item['location']}\n"
if item["description"]:
desc_preview = item["description"][:1000]
if len(item["description"]) > 1000:
desc_preview += "..."
summary_content += f"Description: {desc_preview}\n"
summary_embedding = config.embedding_model_instance.embed(
summary_content
)
chunks = await create_document_chunks(item["event_markdown"]) chunks = await create_document_chunks(item["event_markdown"])

View file

@ -352,7 +352,7 @@ async def index_google_drive_single_file(
await session.commit() await session.commit()
# Process the file # Process the file
indexed, skipped, failed = await _process_single_file( indexed, _skipped, failed = await _process_single_file(
drive_client=drive_client, drive_client=drive_client,
session=session, session=session,
file=file, file=file,
@ -608,7 +608,7 @@ async def _index_with_delta_sync(
{"stage": "delta_sync", "start_token": start_page_token}, {"stage": "delta_sync", "start_token": start_page_token},
) )
changes, final_token, error = await fetch_all_changes( changes, _final_token, error = await fetch_all_changes(
drive_client, start_page_token, folder_id drive_client, start_page_token, folder_id
) )
@ -1011,7 +1011,7 @@ async def _process_single_file(
pending_document.status = DocumentStatus.processing() pending_document.status = DocumentStatus.processing()
await session.commit() await session.commit()
_, error, metadata = await download_and_process_file( _, error, _metadata = await download_and_process_file(
client=drive_client, client=drive_client,
file=file, file=file,
search_space_id=search_space_id, search_space_id=search_space_id,

View file

@ -25,6 +25,7 @@ from app.services.llm_service import get_user_long_context_llm
from app.services.task_logging_service import TaskLoggingService from app.services.task_logging_service import TaskLoggingService
from app.utils.document_converters import ( from app.utils.document_converters import (
create_document_chunks, create_document_chunks,
embed_text,
generate_content_hash, generate_content_hash,
generate_document_summary, generate_document_summary,
generate_unique_identifier_hash, generate_unique_identifier_hash,
@ -413,7 +414,7 @@ async def index_google_gmail_messages(
session, user_id, search_space_id session, user_id, search_space_id
) )
if user_llm: if user_llm and connector.enable_summary:
document_metadata_for_summary = { document_metadata_for_summary = {
"message_id": item["message_id"], "message_id": item["message_id"],
"thread_id": item["thread_id"], "thread_id": item["thread_id"],
@ -432,12 +433,8 @@ async def index_google_gmail_messages(
document_metadata_for_summary, document_metadata_for_summary,
) )
else: else:
summary_content = f"Google Gmail Message: {item['subject']}\n\n" summary_content = f"Google Gmail Message: {item['subject']}\n\nFrom: {item['sender']}\nDate: {item['date_str']}\n\n{item['markdown_content']}"
summary_content += f"Sender: {item['sender']}\n" summary_embedding = embed_text(summary_content)
summary_content += f"Date: {item['date_str']}\n"
summary_embedding = config.embedding_model_instance.embed(
summary_content
)
chunks = await create_document_chunks(item["markdown_content"]) chunks = await create_document_chunks(item["markdown_content"])

View file

@ -14,13 +14,13 @@ from datetime import datetime
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.config import config
from app.connectors.jira_history import JiraHistoryConnector from app.connectors.jira_history import JiraHistoryConnector
from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
from app.services.llm_service import get_user_long_context_llm from app.services.llm_service import get_user_long_context_llm
from app.services.task_logging_service import TaskLoggingService from app.services.task_logging_service import TaskLoggingService
from app.utils.document_converters import ( from app.utils.document_converters import (
create_document_chunks, create_document_chunks,
embed_text,
generate_content_hash, generate_content_hash,
generate_document_summary, generate_document_summary,
generate_unique_identifier_hash, generate_unique_identifier_hash,
@ -356,7 +356,7 @@ async def index_jira_issues(
session, user_id, search_space_id session, user_id, search_space_id
) )
if user_llm: if user_llm and connector.enable_summary:
document_metadata = { document_metadata = {
"issue_key": item["issue_identifier"], "issue_key": item["issue_identifier"],
"issue_title": item["issue_title"], "issue_title": item["issue_title"],
@ -373,14 +373,8 @@ async def index_jira_issues(
item["issue_content"], user_llm, document_metadata item["issue_content"], user_llm, document_metadata
) )
else: else:
# Fallback to simple summary if no LLM configured summary_content = f"Jira Issue {item['issue_identifier']}: {item['issue_title']}\n\n{item['issue_content']}"
summary_content = f"Jira Issue {item['issue_identifier']}: {item['issue_title']}\n\nStatus: {item['formatted_issue'].get('status', 'Unknown')}\n\n" summary_embedding = embed_text(summary_content)
if item["formatted_issue"].get("description"):
summary_content += f"Description: {item['formatted_issue'].get('description')}\n\n"
summary_content += f"Comments: {item['comment_count']}"
summary_embedding = config.embedding_model_instance.embed(
summary_content
)
# Process chunks - using the full issue content with comments # Process chunks - using the full issue content with comments
chunks = await create_document_chunks(item["issue_content"]) chunks = await create_document_chunks(item["issue_content"])

View file

@ -13,13 +13,13 @@ from datetime import datetime
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.config import config
from app.connectors.linear_connector import LinearConnector from app.connectors.linear_connector import LinearConnector
from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
from app.services.llm_service import get_user_long_context_llm from app.services.llm_service import get_user_long_context_llm
from app.services.task_logging_service import TaskLoggingService from app.services.task_logging_service import TaskLoggingService
from app.utils.document_converters import ( from app.utils.document_converters import (
create_document_chunks, create_document_chunks,
embed_text,
generate_content_hash, generate_content_hash,
generate_document_summary, generate_document_summary,
generate_unique_identifier_hash, generate_unique_identifier_hash,
@ -395,7 +395,7 @@ async def index_linear_issues(
session, user_id, search_space_id session, user_id, search_space_id
) )
if user_llm: if user_llm and connector.enable_summary:
document_metadata_for_summary = { document_metadata_for_summary = {
"issue_id": item["issue_identifier"], "issue_id": item["issue_identifier"],
"issue_title": item["issue_title"], "issue_title": item["issue_title"],
@ -412,17 +412,8 @@ async def index_linear_issues(
item["issue_content"], user_llm, document_metadata_for_summary item["issue_content"], user_llm, document_metadata_for_summary
) )
else: else:
# Fallback to simple summary if no LLM configured summary_content = f"Linear Issue {item['issue_identifier']}: {item['issue_title']}\n\nStatus: {item['state']}\n\n{item['issue_content']}"
description = item["description"] summary_embedding = embed_text(summary_content)
if description and len(description) > 1000:
description = description[:997] + "..."
summary_content = f"Linear Issue {item['issue_identifier']}: {item['issue_title']}\n\nStatus: {item['state']}\n\n"
if description:
summary_content += f"Description: {description}\n\n"
summary_content += f"Comments: {item['comment_count']}"
summary_embedding = config.embedding_model_instance.embed(
summary_content
)
chunks = await create_document_chunks(item["issue_content"]) chunks = await create_document_chunks(item["issue_content"])

View file

@ -13,13 +13,13 @@ from datetime import datetime, timedelta
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.config import config
from app.connectors.luma_connector import LumaConnector from app.connectors.luma_connector import LumaConnector
from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
from app.services.llm_service import get_user_long_context_llm from app.services.llm_service import get_user_long_context_llm
from app.services.task_logging_service import TaskLoggingService from app.services.task_logging_service import TaskLoggingService
from app.utils.document_converters import ( from app.utils.document_converters import (
create_document_chunks, create_document_chunks,
embed_text,
generate_content_hash, generate_content_hash,
generate_document_summary, generate_document_summary,
generate_unique_identifier_hash, generate_unique_identifier_hash,
@ -441,7 +441,7 @@ async def index_luma_events(
session, user_id, search_space_id session, user_id, search_space_id
) )
if user_llm: if user_llm and connector.enable_summary:
document_metadata_for_summary = { document_metadata_for_summary = {
"event_id": item["event_id"], "event_id": item["event_id"],
"event_name": item["event_name"], "event_name": item["event_name"],
@ -462,29 +462,10 @@ async def index_luma_events(
item["event_markdown"], user_llm, document_metadata_for_summary item["event_markdown"], user_llm, document_metadata_for_summary
) )
else: else:
# Fallback to simple summary if no LLM configured summary_content = (
summary_content = f"Luma Event: {item['event_name']}\n\n" f"Luma Event: {item['event_name']}\n\n{item['event_markdown']}"
if item["event_url"]:
summary_content += f"URL: {item['event_url']}\n"
summary_content += f"Start: {item['start_at']}\n"
summary_content += f"End: {item['end_at']}\n"
if item["timezone"]:
summary_content += f"Timezone: {item['timezone']}\n"
if item["location"]:
summary_content += f"Location: {item['location']}\n"
if item["city"]:
summary_content += f"City: {item['city']}\n"
if item["host_names"]:
summary_content += f"Hosts: {item['host_names']}\n"
if item["description"]:
desc_preview = item["description"][:1000]
if len(item["description"]) > 1000:
desc_preview += "..."
summary_content += f"Description: {desc_preview}\n"
summary_embedding = config.embedding_model_instance.embed(
summary_content
) )
summary_embedding = embed_text(summary_content)
chunks = await create_document_chunks(item["event_markdown"]) chunks = await create_document_chunks(item["event_markdown"])

View file

@ -13,13 +13,13 @@ from datetime import datetime
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.config import config
from app.connectors.notion_history import NotionHistoryConnector from app.connectors.notion_history import NotionHistoryConnector
from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
from app.services.llm_service import get_user_long_context_llm from app.services.llm_service import get_user_long_context_llm
from app.services.task_logging_service import TaskLoggingService from app.services.task_logging_service import TaskLoggingService
from app.utils.document_converters import ( from app.utils.document_converters import (
create_document_chunks, create_document_chunks,
embed_text,
generate_content_hash, generate_content_hash,
generate_document_summary, generate_document_summary,
generate_unique_identifier_hash, generate_unique_identifier_hash,
@ -447,7 +447,7 @@ async def index_notion_pages(
session, user_id, search_space_id session, user_id, search_space_id
) )
if user_llm: if user_llm and connector.enable_summary:
document_metadata_for_summary = { document_metadata_for_summary = {
"page_title": item["page_title"], "page_title": item["page_title"],
"page_id": item["page_id"], "page_id": item["page_id"],
@ -463,11 +463,8 @@ async def index_notion_pages(
document_metadata_for_summary, document_metadata_for_summary,
) )
else: else:
# Fallback to simple summary if no LLM configured summary_content = f"Notion Page: {item['page_title']}\n\n{item['markdown_content']}"
summary_content = f"Notion Page: {item['page_title']}\n\n{item['markdown_content'][:500]}..." summary_embedding = embed_text(summary_content)
summary_embedding = config.embedding_model_instance.embed(
summary_content
)
chunks = await create_document_chunks(item["markdown_content"]) chunks = await create_document_chunks(item["markdown_content"])

View file

@ -26,6 +26,7 @@ from app.services.llm_service import get_user_long_context_llm
from app.services.task_logging_service import TaskLoggingService from app.services.task_logging_service import TaskLoggingService
from app.utils.document_converters import ( from app.utils.document_converters import (
create_document_chunks, create_document_chunks,
embed_text,
generate_content_hash, generate_content_hash,
generate_document_summary, generate_document_summary,
generate_unique_identifier_hash, generate_unique_identifier_hash,
@ -546,7 +547,7 @@ async def index_obsidian_vault(
# Generate summary # Generate summary
summary_content = "" summary_content = ""
if long_context_llm: if long_context_llm and connector.enable_summary:
summary_content, _ = await generate_document_summary( summary_content, _ = await generate_document_summary(
document_string, document_string,
long_context_llm, long_context_llm,
@ -554,7 +555,7 @@ async def index_obsidian_vault(
) )
# Generate embedding # Generate embedding
embedding = config.embedding_model_instance.embed(document_string) embedding = embed_text(document_string)
# Add URL and summary to metadata # Add URL and summary to metadata
document_metadata["url"] = f"obsidian://{vault_name}/{relative_path}" document_metadata["url"] = f"obsidian://{vault_name}/{relative_path}"

View file

@ -17,12 +17,12 @@ from slack_sdk.errors import SlackApiError
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.config import config
from app.connectors.slack_history import SlackHistory from app.connectors.slack_history import SlackHistory
from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
from app.services.task_logging_service import TaskLoggingService from app.services.task_logging_service import TaskLoggingService
from app.utils.document_converters import ( from app.utils.document_converters import (
create_document_chunks, create_document_chunks,
embed_text,
generate_content_hash, generate_content_hash,
generate_unique_identifier_hash, generate_unique_identifier_hash,
) )
@ -542,9 +542,7 @@ async def index_slack_messages(
# Heavy processing (embeddings, chunks) # Heavy processing (embeddings, chunks)
chunks = await create_document_chunks(item["combined_document_string"]) chunks = await create_document_chunks(item["combined_document_string"])
doc_embedding = config.embedding_model_instance.embed( doc_embedding = embed_text(item["combined_document_string"])
item["combined_document_string"]
)
# Update document to READY with actual content # Update document to READY with actual content
document.title = f"{item['team_name']}#{item['channel_name']}" document.title = f"{item['team_name']}#{item['channel_name']}"

View file

@ -16,12 +16,12 @@ from datetime import UTC, datetime
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.config import config
from app.connectors.teams_history import TeamsHistory from app.connectors.teams_history import TeamsHistory
from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
from app.services.task_logging_service import TaskLoggingService from app.services.task_logging_service import TaskLoggingService
from app.utils.document_converters import ( from app.utils.document_converters import (
create_document_chunks, create_document_chunks,
embed_text,
generate_content_hash, generate_content_hash,
generate_unique_identifier_hash, generate_unique_identifier_hash,
) )
@ -581,9 +581,7 @@ async def index_teams_messages(
# Heavy processing (embeddings, chunks) # Heavy processing (embeddings, chunks)
chunks = await create_document_chunks(item["combined_document_string"]) chunks = await create_document_chunks(item["combined_document_string"])
doc_embedding = config.embedding_model_instance.embed( doc_embedding = embed_text(item["combined_document_string"])
item["combined_document_string"]
)
# Update document to READY with actual content # Update document to READY with actual content
document.title = f"{item['team_name']} - {item['channel_name']}" document.title = f"{item['team_name']} - {item['channel_name']}"

View file

@ -13,13 +13,13 @@ from datetime import datetime
from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.config import config
from app.connectors.webcrawler_connector import WebCrawlerConnector from app.connectors.webcrawler_connector import WebCrawlerConnector
from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType from app.db import Document, DocumentStatus, DocumentType, SearchSourceConnectorType
from app.services.llm_service import get_user_long_context_llm from app.services.llm_service import get_user_long_context_llm
from app.services.task_logging_service import TaskLoggingService from app.services.task_logging_service import TaskLoggingService
from app.utils.document_converters import ( from app.utils.document_converters import (
create_document_chunks, create_document_chunks,
embed_text,
generate_content_hash, generate_content_hash,
generate_document_summary, generate_document_summary,
generate_unique_identifier_hash, generate_unique_identifier_hash,
@ -377,7 +377,7 @@ async def index_crawled_urls(
session, user_id, search_space_id session, user_id, search_space_id
) )
if user_llm: if user_llm and connector.enable_summary:
document_metadata_for_summary = { document_metadata_for_summary = {
"url": url, "url": url,
"title": title, "title": title,
@ -393,24 +393,8 @@ async def index_crawled_urls(
structured_document, user_llm, document_metadata_for_summary structured_document, user_llm, document_metadata_for_summary
) )
else: else:
# Fallback to simple summary if no LLM configured summary_content = f"Crawled URL: {title}\n\nURL: {url}\n\n{content}"
summary_content = f"Crawled URL: {title}\n\n" summary_embedding = embed_text(summary_content)
summary_content += f"URL: {url}\n"
if description:
summary_content += f"Description: {description}\n"
if language:
summary_content += f"Language: {language}\n"
summary_content += f"Crawler: {crawler_type}\n\n"
# Add content preview
content_preview = content[:1000]
if len(content) > 1000:
content_preview += "..."
summary_content += f"Content Preview:\n{content_preview}\n"
summary_embedding = config.embedding_model_instance.embed(
summary_content
)
# Process chunks # Process chunks
chunks = await create_document_chunks(content) chunks = await create_document_chunks(content)

View file

@ -25,6 +25,7 @@ from app.services.task_logging_service import TaskLoggingService
from app.utils.document_converters import ( from app.utils.document_converters import (
convert_document_to_markdown, convert_document_to_markdown,
create_document_chunks, create_document_chunks,
embed_text,
generate_content_hash, generate_content_hash,
generate_document_summary, generate_document_summary,
generate_unique_identifier_hash, generate_unique_identifier_hash,
@ -760,11 +761,7 @@ async def add_received_file_document_using_docling(
f"{metadata_section}\n\n# DOCUMENT SUMMARY\n\n{summary_content}" f"{metadata_section}\n\n# DOCUMENT SUMMARY\n\n{summary_content}"
) )
from app.config import config summary_embedding = embed_text(enhanced_summary_content)
summary_embedding = config.embedding_model_instance.embed(
enhanced_summary_content
)
# Process chunks # Process chunks
chunks = await create_document_chunks(file_in_markdown) chunks = await create_document_chunks(file_in_markdown)
@ -1599,6 +1596,7 @@ async def process_file_in_background_with_document(
log_entry: Log, log_entry: Log,
connector: dict | None = None, connector: dict | None = None,
notification: Notification | None = None, notification: Notification | None = None,
should_summarize: bool = False,
) -> Document | None: ) -> Document | None:
""" """
Process file and update existing pending document (2-phase pattern). Process file and update existing pending document (2-phase pattern).
@ -1881,6 +1879,7 @@ async def process_file_in_background_with_document(
user_id=user_id, user_id=user_id,
session=session, session=session,
llm=user_llm, llm=user_llm,
should_summarize=should_summarize,
) )
await task_logger.log_task_success( await task_logger.log_task_success(

View file

@ -15,6 +15,7 @@ from sqlalchemy.orm import selectinload
from app.config import config from app.config import config
from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument, async_session_maker from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument, async_session_maker
from app.utils.document_converters import embed_text
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -89,7 +90,7 @@ def create_surfsense_docs_chunks(content: str) -> list[SurfsenseDocsChunk]:
return [ return [
SurfsenseDocsChunk( SurfsenseDocsChunk(
content=chunk.text, content=chunk.text,
embedding=config.embedding_model_instance.embed(chunk.text), embedding=embed_text(chunk.text),
) )
for chunk in config.chunker_instance.chunk(content) for chunk in config.chunker_instance.chunk(content)
] ]
@ -154,7 +155,7 @@ async def index_surfsense_docs(session: AsyncSession) -> tuple[int, int, int, in
existing_doc.title = title existing_doc.title = title
existing_doc.content = content existing_doc.content = content
existing_doc.content_hash = content_hash existing_doc.content_hash = content_hash
existing_doc.embedding = config.embedding_model_instance.embed(content) existing_doc.embedding = embed_text(content)
existing_doc.chunks = chunks existing_doc.chunks = chunks
existing_doc.updated_at = datetime.now(UTC) existing_doc.updated_at = datetime.now(UTC)
@ -170,7 +171,7 @@ async def index_surfsense_docs(session: AsyncSession) -> tuple[int, int, int, in
title=title, title=title,
content=content, content=content,
content_hash=content_hash, content_hash=content_hash,
embedding=config.embedding_model_instance.embed(content), embedding=embed_text(content),
chunks=chunks, chunks=chunks,
updated_at=datetime.now(UTC), updated_at=datetime.now(UTC),
) )

View file

@ -1,11 +1,59 @@
import hashlib import hashlib
import logging
import warnings
import numpy as np
from litellm import get_model_info, token_counter from litellm import get_model_info, token_counter
from app.config import config from app.config import config
from app.db import Chunk, DocumentType from app.db import Chunk, DocumentType
from app.prompts import SUMMARY_PROMPT_TEMPLATE from app.prompts import SUMMARY_PROMPT_TEMPLATE
logger = logging.getLogger(__name__)
def _get_embedding_max_tokens() -> int:
"""Get the max token limit for the configured embedding model.
Checks model properties in order: max_seq_length, _max_tokens.
Falls back to 8192 (OpenAI embedding default).
"""
model = config.embedding_model_instance
for attr in ("max_seq_length", "_max_tokens"):
val = getattr(model, attr, None)
if isinstance(val, int) and val > 0:
return val
return 8192
def truncate_for_embedding(text: str) -> str:
"""Truncate text to fit within the embedding model's context window.
Uses the embedding model's own tokenizer for accurate token counting,
so the result is model-agnostic regardless of the underlying provider.
"""
max_tokens = _get_embedding_max_tokens()
if len(text) // 3 <= max_tokens:
return text
tokenizer = config.embedding_model_instance.get_tokenizer()
tokens = tokenizer.encode(text)
if len(tokens) <= max_tokens:
return text
warnings.warn(
f"Truncating text from {len(tokens)} to {max_tokens} tokens for embedding.",
stacklevel=2,
)
return tokenizer.decode(tokens[:max_tokens])
def embed_text(text: str) -> np.ndarray:
"""Truncate text to fit and embed it. Drop-in replacement for
``config.embedding_model_instance.embed(text)`` that never exceeds the
model's context window."""
return config.embedding_model_instance.embed(truncate_for_embedding(text))
def get_model_context_window(model_name: str) -> int: def get_model_context_window(model_name: str) -> int:
"""Get the total context window size for a model (input + output tokens).""" """Get the total context window size for a model (input + output tokens)."""
@ -146,7 +194,7 @@ async def generate_document_summary(
else: else:
enhanced_summary_content = summary_content enhanced_summary_content = summary_content
summary_embedding = config.embedding_model_instance.embed(enhanced_summary_content) summary_embedding = embed_text(enhanced_summary_content)
return enhanced_summary_content, summary_embedding return enhanced_summary_content, summary_embedding
@ -164,7 +212,7 @@ async def create_document_chunks(content: str) -> list[Chunk]:
return [ return [
Chunk( Chunk(
content=chunk.text, content=chunk.text,
embedding=config.embedding_model_instance.embed(chunk.text), embedding=embed_text(chunk.text),
) )
for chunk in config.chunker_instance.chunk(content) for chunk in config.chunker_instance.chunk(content)
] ]

View file

@ -29,4 +29,7 @@ if __name__ == "__main__":
config = uvicorn.Config(**config_kwargs) config = uvicorn.Config(**config_kwargs)
server = uvicorn.Server(config) server = uvicorn.Server(config)
if sys.platform == "win32":
asyncio.run(server.serve(), loop_factory=asyncio.SelectorEventLoop)
else:
server.run() server.run()

View file

@ -26,7 +26,6 @@ import {
import { import {
clearPlanOwnerRegistry, clearPlanOwnerRegistry,
// extractWriteTodosFromContent, // extractWriteTodosFromContent,
hydratePlanStateAtom,
} from "@/atoms/chat/plan-state.atom"; } from "@/atoms/chat/plan-state.atom";
import { closeReportPanelAtom } from "@/atoms/chat/report-panel.atom"; import { closeReportPanelAtom } from "@/atoms/chat/report-panel.atom";
import { membersAtom } from "@/atoms/members/members-query.atoms"; import { membersAtom } from "@/atoms/members/members-query.atoms";
@ -73,7 +72,6 @@ import {
appendText, appendText,
buildContentForPersistence, buildContentForPersistence,
buildContentForUI, buildContentForUI,
type ContentPart,
type ContentPartsState, type ContentPartsState,
readSSEStream, readSSEStream,
type ThinkingStepData, type ThinkingStepData,
@ -188,7 +186,6 @@ export default function NewChatPage() {
const setMentionedDocumentIds = useSetAtom(mentionedDocumentIdsAtom); const setMentionedDocumentIds = useSetAtom(mentionedDocumentIdsAtom);
const setMentionedDocuments = useSetAtom(mentionedDocumentsAtom); const setMentionedDocuments = useSetAtom(mentionedDocumentsAtom);
const setMessageDocumentsMap = useSetAtom(messageDocumentsMapAtom); const setMessageDocumentsMap = useSetAtom(messageDocumentsMapAtom);
const hydratePlanState = useSetAtom(hydratePlanStateAtom);
const setCurrentThreadState = useSetAtom(currentThreadAtom); const setCurrentThreadState = useSetAtom(currentThreadAtom);
const setTargetCommentId = useSetAtom(setTargetCommentIdAtom); const setTargetCommentId = useSetAtom(setTargetCommentIdAtom);
const clearTargetCommentId = useSetAtom(clearTargetCommentIdAtom); const clearTargetCommentId = useSetAtom(clearTargetCommentIdAtom);
@ -350,7 +347,6 @@ export default function NewChatPage() {
setMessageDocumentsMap, setMessageDocumentsMap,
setMentionedDocumentIds, setMentionedDocumentIds,
setMentionedDocuments, setMentionedDocuments,
hydratePlanState,
closeReportPanel, closeReportPanel,
]); ]);

View file

@ -97,6 +97,7 @@ export const ConnectorIndicator: FC<{ hideTrigger?: boolean }> = ({ hideTrigger
isDisconnecting, isDisconnecting,
periodicEnabled, periodicEnabled,
frequencyMinutes, frequencyMinutes,
enableSummary,
allConnectors, allConnectors,
viewingAccountsType, viewingAccountsType,
viewingMCPList, viewingMCPList,
@ -105,6 +106,7 @@ export const ConnectorIndicator: FC<{ hideTrigger?: boolean }> = ({ hideTrigger
setEndDate, setEndDate,
setPeriodicEnabled, setPeriodicEnabled,
setFrequencyMinutes, setFrequencyMinutes,
setEnableSummary,
handleOpenChange, handleOpenChange,
handleTabChange, handleTabChange,
handleScroll, handleScroll,
@ -282,6 +284,7 @@ export const ConnectorIndicator: FC<{ hideTrigger?: boolean }> = ({ hideTrigger
endDate={endDate} endDate={endDate}
periodicEnabled={periodicEnabled} periodicEnabled={periodicEnabled}
frequencyMinutes={frequencyMinutes} frequencyMinutes={frequencyMinutes}
enableSummary={enableSummary}
isSaving={isSaving} isSaving={isSaving}
isDisconnecting={isDisconnecting} isDisconnecting={isDisconnecting}
isIndexing={indexingConnectorIds.has(editingConnector.id)} isIndexing={indexingConnectorIds.has(editingConnector.id)}
@ -290,6 +293,7 @@ export const ConnectorIndicator: FC<{ hideTrigger?: boolean }> = ({ hideTrigger
onEndDateChange={setEndDate} onEndDateChange={setEndDate}
onPeriodicEnabledChange={setPeriodicEnabled} onPeriodicEnabledChange={setPeriodicEnabled}
onFrequencyChange={setFrequencyMinutes} onFrequencyChange={setFrequencyMinutes}
onEnableSummaryChange={setEnableSummary}
onSave={() => { onSave={() => {
startIndexing(editingConnector.id); startIndexing(editingConnector.id);
handleSaveConnector(() => refreshConnectors()); handleSaveConnector(() => refreshConnectors());
@ -328,11 +332,13 @@ export const ConnectorIndicator: FC<{ hideTrigger?: boolean }> = ({ hideTrigger
endDate={endDate} endDate={endDate}
periodicEnabled={periodicEnabled} periodicEnabled={periodicEnabled}
frequencyMinutes={frequencyMinutes} frequencyMinutes={frequencyMinutes}
enableSummary={enableSummary}
isStartingIndexing={isStartingIndexing} isStartingIndexing={isStartingIndexing}
onStartDateChange={setStartDate} onStartDateChange={setStartDate}
onEndDateChange={setEndDate} onEndDateChange={setEndDate}
onPeriodicEnabledChange={setPeriodicEnabled} onPeriodicEnabledChange={setPeriodicEnabled}
onFrequencyChange={setFrequencyMinutes} onFrequencyChange={setFrequencyMinutes}
onEnableSummaryChange={setEnableSummary}
onConfigChange={setIndexingConnectorConfig} onConfigChange={setIndexingConnectorConfig}
onStartIndexing={() => { onStartIndexing={() => {
if (indexingConfig.connectorId) { if (indexingConfig.connectorId) {

View file

@ -0,0 +1,25 @@
"use client";
import type { FC } from "react";
import { Switch } from "@/components/ui/switch";
interface SummaryConfigProps {
enabled: boolean;
onEnabledChange: (enabled: boolean) => void;
}
export const SummaryConfig: FC<SummaryConfigProps> = ({ enabled, onEnabledChange }) => {
return (
<div className="rounded-xl bg-slate-400/5 dark:bg-white/5 p-3 sm:p-6">
<div className="flex items-center justify-between">
<div className="space-y-1">
<h3 className="font-medium text-sm sm:text-base">Enable AI Summary</h3>
<p className="text-xs sm:text-sm text-muted-foreground">
Improves search quality but adds latency during indexing
</p>
</div>
<Switch checked={enabled} onCheckedChange={onEnabledChange} />
</div>
</div>
);
};

View file

@ -9,6 +9,7 @@ import type { SearchSourceConnector } from "@/contracts/types/connector.types";
import { cn } from "@/lib/utils"; import { cn } from "@/lib/utils";
import { DateRangeSelector } from "../../components/date-range-selector"; import { DateRangeSelector } from "../../components/date-range-selector";
import { PeriodicSyncConfig } from "../../components/periodic-sync-config"; import { PeriodicSyncConfig } from "../../components/periodic-sync-config";
import { SummaryConfig } from "../../components/summary-config";
import { getConnectorDisplayName } from "../../tabs/all-connectors-tab"; import { getConnectorDisplayName } from "../../tabs/all-connectors-tab";
import { getConnectorConfigComponent } from "../index"; import { getConnectorConfigComponent } from "../index";
@ -18,6 +19,7 @@ interface ConnectorEditViewProps {
endDate: Date | undefined; endDate: Date | undefined;
periodicEnabled: boolean; periodicEnabled: boolean;
frequencyMinutes: string; frequencyMinutes: string;
enableSummary: boolean;
isSaving: boolean; isSaving: boolean;
isDisconnecting: boolean; isDisconnecting: boolean;
isIndexing?: boolean; isIndexing?: boolean;
@ -26,6 +28,7 @@ interface ConnectorEditViewProps {
onEndDateChange: (date: Date | undefined) => void; onEndDateChange: (date: Date | undefined) => void;
onPeriodicEnabledChange: (enabled: boolean) => void; onPeriodicEnabledChange: (enabled: boolean) => void;
onFrequencyChange: (frequency: string) => void; onFrequencyChange: (frequency: string) => void;
onEnableSummaryChange: (enabled: boolean) => void;
onSave: () => void; onSave: () => void;
onDisconnect: () => void; onDisconnect: () => void;
onBack: () => void; onBack: () => void;
@ -40,6 +43,7 @@ export const ConnectorEditView: FC<ConnectorEditViewProps> = ({
endDate, endDate,
periodicEnabled, periodicEnabled,
frequencyMinutes, frequencyMinutes,
enableSummary,
isSaving, isSaving,
isDisconnecting, isDisconnecting,
isIndexing = false, isIndexing = false,
@ -48,6 +52,7 @@ export const ConnectorEditView: FC<ConnectorEditViewProps> = ({
onEndDateChange, onEndDateChange,
onPeriodicEnabledChange, onPeriodicEnabledChange,
onFrequencyChange, onFrequencyChange,
onEnableSummaryChange,
onSave, onSave,
onDisconnect, onDisconnect,
onBack, onBack,
@ -209,9 +214,12 @@ export const ConnectorEditView: FC<ConnectorEditViewProps> = ({
/> />
)} )}
{/* Date range selector and periodic sync - only shown for indexable connectors */} {/* Summary and sync settings - only shown for indexable connectors */}
{connector.is_indexable && ( {connector.is_indexable && (
<> <>
{/* AI Summary toggle */}
<SummaryConfig enabled={enableSummary} onEnabledChange={onEnableSummaryChange} />
{/* Date range selector - not shown for Google Drive (regular and Composio), Webcrawler, or GitHub (indexes full repo snapshots) */} {/* Date range selector - not shown for Google Drive (regular and Composio), Webcrawler, or GitHub (indexes full repo snapshots) */}
{connector.connector_type !== "GOOGLE_DRIVE_CONNECTOR" && {connector.connector_type !== "GOOGLE_DRIVE_CONNECTOR" &&
connector.connector_type !== "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" && connector.connector_type !== "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" &&

View file

@ -10,6 +10,7 @@ import { getConnectorTypeDisplay } from "@/lib/connectors/utils";
import { cn } from "@/lib/utils"; import { cn } from "@/lib/utils";
import { DateRangeSelector } from "../../components/date-range-selector"; import { DateRangeSelector } from "../../components/date-range-selector";
import { PeriodicSyncConfig } from "../../components/periodic-sync-config"; import { PeriodicSyncConfig } from "../../components/periodic-sync-config";
import { SummaryConfig } from "../../components/summary-config";
import type { IndexingConfigState } from "../../constants/connector-constants"; import type { IndexingConfigState } from "../../constants/connector-constants";
import { getConnectorDisplayName } from "../../tabs/all-connectors-tab"; import { getConnectorDisplayName } from "../../tabs/all-connectors-tab";
import { getConnectorConfigComponent } from "../index"; import { getConnectorConfigComponent } from "../index";
@ -21,11 +22,13 @@ interface IndexingConfigurationViewProps {
endDate: Date | undefined; endDate: Date | undefined;
periodicEnabled: boolean; periodicEnabled: boolean;
frequencyMinutes: string; frequencyMinutes: string;
enableSummary: boolean;
isStartingIndexing: boolean; isStartingIndexing: boolean;
onStartDateChange: (date: Date | undefined) => void; onStartDateChange: (date: Date | undefined) => void;
onEndDateChange: (date: Date | undefined) => void; onEndDateChange: (date: Date | undefined) => void;
onPeriodicEnabledChange: (enabled: boolean) => void; onPeriodicEnabledChange: (enabled: boolean) => void;
onFrequencyChange: (frequency: string) => void; onFrequencyChange: (frequency: string) => void;
onEnableSummaryChange: (enabled: boolean) => void;
onConfigChange?: (config: Record<string, unknown>) => void; onConfigChange?: (config: Record<string, unknown>) => void;
onStartIndexing: () => void; onStartIndexing: () => void;
onSkip: () => void; onSkip: () => void;
@ -38,11 +41,13 @@ export const IndexingConfigurationView: FC<IndexingConfigurationViewProps> = ({
endDate, endDate,
periodicEnabled, periodicEnabled,
frequencyMinutes, frequencyMinutes,
enableSummary,
isStartingIndexing, isStartingIndexing,
onStartDateChange, onStartDateChange,
onEndDateChange, onEndDateChange,
onPeriodicEnabledChange, onPeriodicEnabledChange,
onFrequencyChange, onFrequencyChange,
onEnableSummaryChange,
onConfigChange, onConfigChange,
onStartIndexing, onStartIndexing,
onSkip, onSkip,
@ -149,9 +154,12 @@ export const IndexingConfigurationView: FC<IndexingConfigurationViewProps> = ({
<ConnectorConfigComponent connector={connector} onConfigChange={onConfigChange} /> <ConnectorConfigComponent connector={connector} onConfigChange={onConfigChange} />
)} )}
{/* Date range selector and periodic sync - only shown for indexable connectors */} {/* Summary and sync settings - only shown for indexable connectors */}
{connector?.is_indexable && ( {connector?.is_indexable && (
<> <>
{/* AI Summary toggle */}
<SummaryConfig enabled={enableSummary} onEnabledChange={onEnableSummaryChange} />
{/* Date range selector - not shown for Google Drive (regular and Composio), Webcrawler, or GitHub (indexes full repo snapshots) */} {/* Date range selector - not shown for Google Drive (regular and Composio), Webcrawler, or GitHub (indexes full repo snapshots) */}
{config.connectorType !== "GOOGLE_DRIVE_CONNECTOR" && {config.connectorType !== "GOOGLE_DRIVE_CONNECTOR" &&
config.connectorType !== "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" && config.connectorType !== "COMPOSIO_GOOGLE_DRIVE_CONNECTOR" &&

View file

@ -67,6 +67,7 @@ export const useConnectorDialog = () => {
const [isStartingIndexing, setIsStartingIndexing] = useState(false); const [isStartingIndexing, setIsStartingIndexing] = useState(false);
const [periodicEnabled, setPeriodicEnabled] = useState(false); const [periodicEnabled, setPeriodicEnabled] = useState(false);
const [frequencyMinutes, setFrequencyMinutes] = useState("1440"); const [frequencyMinutes, setFrequencyMinutes] = useState("1440");
const [enableSummary, setEnableSummary] = useState(false);
// Edit mode state // Edit mode state
const [editingConnector, setEditingConnector] = useState<SearchSourceConnector | null>(null); const [editingConnector, setEditingConnector] = useState<SearchSourceConnector | null>(null);
@ -240,6 +241,7 @@ export const useConnectorDialog = () => {
!connector.is_indexable ? false : connector.periodic_indexing_enabled !connector.is_indexable ? false : connector.periodic_indexing_enabled
); );
setFrequencyMinutes(connector.indexing_frequency_minutes?.toString() || "1440"); setFrequencyMinutes(connector.indexing_frequency_minutes?.toString() || "1440");
setEnableSummary(connector.enable_summary ?? false);
// Reset dates - user can set new ones for re-indexing // Reset dates - user can set new ones for re-indexing
setStartDate(undefined); setStartDate(undefined);
setEndDate(undefined); setEndDate(undefined);
@ -257,6 +259,7 @@ export const useConnectorDialog = () => {
setEndDate(undefined); setEndDate(undefined);
setPeriodicEnabled(false); setPeriodicEnabled(false);
setFrequencyMinutes("1440"); setFrequencyMinutes("1440");
setEnableSummary(false);
setIsScrolled(false); setIsScrolled(false);
setSearchQuery(""); setSearchQuery("");
} }
@ -269,6 +272,7 @@ export const useConnectorDialog = () => {
setEndDate(undefined); setEndDate(undefined);
setPeriodicEnabled(false); setPeriodicEnabled(false);
setFrequencyMinutes("1440"); setFrequencyMinutes("1440");
setEnableSummary(false);
setIsScrolled(false); setIsScrolled(false);
setSearchQuery(""); setSearchQuery("");
} }
@ -722,6 +726,7 @@ export const useConnectorDialog = () => {
setConnectorConfig(connector.config || {}); setConnectorConfig(connector.config || {});
setPeriodicEnabled(false); setPeriodicEnabled(false);
setFrequencyMinutes("1440"); setFrequencyMinutes("1440");
setEnableSummary(connector.enable_summary ?? false);
setStartDate(undefined); setStartDate(undefined);
setEndDate(undefined); setEndDate(undefined);
@ -909,12 +914,13 @@ export const useConnectorDialog = () => {
const startDateStr = startDate ? format(startDate, "yyyy-MM-dd") : undefined; const startDateStr = startDate ? format(startDate, "yyyy-MM-dd") : undefined;
const endDateStr = endDate ? format(endDate, "yyyy-MM-dd") : undefined; const endDateStr = endDate ? format(endDate, "yyyy-MM-dd") : undefined;
// Update connector with periodic sync settings and config changes // Update connector with summary, periodic sync settings, and config changes
if (periodicEnabled || indexingConnectorConfig) { if (enableSummary || periodicEnabled || indexingConnectorConfig) {
const frequency = periodicEnabled ? parseInt(frequencyMinutes, 10) : undefined; const frequency = periodicEnabled ? parseInt(frequencyMinutes, 10) : undefined;
await updateConnector({ await updateConnector({
id: indexingConfig.connectorId, id: indexingConfig.connectorId,
data: { data: {
enable_summary: enableSummary,
...(periodicEnabled && { ...(periodicEnabled && {
periodic_indexing_enabled: true, periodic_indexing_enabled: true,
indexing_frequency_minutes: frequency, indexing_frequency_minutes: frequency,
@ -1042,6 +1048,7 @@ export const useConnectorDialog = () => {
updateConnector, updateConnector,
periodicEnabled, periodicEnabled,
frequencyMinutes, frequencyMinutes,
enableSummary,
router, router,
indexingConnectorConfig, indexingConnectorConfig,
] ]
@ -1108,6 +1115,7 @@ export const useConnectorDialog = () => {
// Load existing periodic sync settings (disabled for non-indexable connectors) // Load existing periodic sync settings (disabled for non-indexable connectors)
setPeriodicEnabled(!connector.is_indexable ? false : connector.periodic_indexing_enabled); setPeriodicEnabled(!connector.is_indexable ? false : connector.periodic_indexing_enabled);
setFrequencyMinutes(connector.indexing_frequency_minutes?.toString() || "1440"); setFrequencyMinutes(connector.indexing_frequency_minutes?.toString() || "1440");
setEnableSummary(connector.enable_summary ?? false);
// Reset dates - user can set new ones for re-indexing // Reset dates - user can set new ones for re-indexing
setStartDate(undefined); setStartDate(undefined);
setEndDate(undefined); setEndDate(undefined);
@ -1189,6 +1197,7 @@ export const useConnectorDialog = () => {
id: editingConnector.id, id: editingConnector.id,
data: { data: {
name: connectorName || editingConnector.name, name: connectorName || editingConnector.name,
enable_summary: enableSummary,
periodic_indexing_enabled: !editingConnector.is_indexable ? false : periodicEnabled, periodic_indexing_enabled: !editingConnector.is_indexable ? false : periodicEnabled,
indexing_frequency_minutes: !editingConnector.is_indexable ? null : frequency, indexing_frequency_minutes: !editingConnector.is_indexable ? null : frequency,
config: connectorConfig || editingConnector.config, config: connectorConfig || editingConnector.config,
@ -1326,6 +1335,7 @@ export const useConnectorDialog = () => {
updateConnector, updateConnector,
periodicEnabled, periodicEnabled,
frequencyMinutes, frequencyMinutes,
enableSummary,
getFrequencyLabel, getFrequencyLabel,
router, router,
connectorConfig, connectorConfig,
@ -1518,6 +1528,7 @@ export const useConnectorDialog = () => {
setEndDate(undefined); setEndDate(undefined);
setPeriodicEnabled(false); setPeriodicEnabled(false);
setFrequencyMinutes("1440"); setFrequencyMinutes("1440");
setEnableSummary(false);
} }
} }
}, },
@ -1557,6 +1568,7 @@ export const useConnectorDialog = () => {
isDisconnecting, isDisconnecting,
periodicEnabled, periodicEnabled,
frequencyMinutes, frequencyMinutes,
enableSummary,
searchSpaceId, searchSpaceId,
allConnectors, allConnectors,
viewingAccountsType, viewingAccountsType,
@ -1568,6 +1580,7 @@ export const useConnectorDialog = () => {
setEndDate, setEndDate,
setPeriodicEnabled, setPeriodicEnabled,
setFrequencyMinutes, setFrequencyMinutes,
setEnableSummary,
setConnectorName, setConnectorName,
// Handlers // Handlers

View file

@ -8,6 +8,7 @@ import { useCallback, useMemo, useRef, useState } from "react";
import { useDropzone } from "react-dropzone"; import { useDropzone } from "react-dropzone";
import { toast } from "sonner"; import { toast } from "sonner";
import { uploadDocumentMutationAtom } from "@/atoms/documents/document-mutation.atoms"; import { uploadDocumentMutationAtom } from "@/atoms/documents/document-mutation.atoms";
import { SummaryConfig } from "@/components/assistant-ui/connector-popup/components/summary-config";
import { import {
Accordion, Accordion,
AccordionContent, AccordionContent,
@ -124,6 +125,7 @@ export function DocumentUploadTab({
const [files, setFiles] = useState<File[]>([]); const [files, setFiles] = useState<File[]>([]);
const [uploadProgress, setUploadProgress] = useState(0); const [uploadProgress, setUploadProgress] = useState(0);
const [accordionValue, setAccordionValue] = useState<string>(""); const [accordionValue, setAccordionValue] = useState<string>("");
const [shouldSummarize, setShouldSummarize] = useState(false);
const [uploadDocumentMutation] = useAtom(uploadDocumentMutationAtom); const [uploadDocumentMutation] = useAtom(uploadDocumentMutationAtom);
const { mutate: uploadDocuments, isPending: isUploading } = uploadDocumentMutation; const { mutate: uploadDocuments, isPending: isUploading } = uploadDocumentMutation;
const fileInputRef = useRef<HTMLInputElement>(null); const fileInputRef = useRef<HTMLInputElement>(null);
@ -216,7 +218,7 @@ export function DocumentUploadTab({
}, 200); }, 200);
uploadDocuments( uploadDocuments(
{ files, search_space_id: Number(searchSpaceId) }, { files, search_space_id: Number(searchSpaceId), should_summarize: shouldSummarize },
{ {
onSuccess: () => { onSuccess: () => {
clearInterval(progressInterval); clearInterval(progressInterval);
@ -413,6 +415,10 @@ export function DocumentUploadTab({
</motion.div> </motion.div>
)} )}
<div className="mt-3 sm:mt-6">
<SummaryConfig enabled={shouldSummarize} onEnabledChange={setShouldSummarize} />
</div>
<motion.div <motion.div
className="mt-3 sm:mt-6" className="mt-3 sm:mt-6"
initial={{ opacity: 0, y: 10 }} initial={{ opacity: 0, y: 10 }}

View file

@ -41,6 +41,7 @@ export const searchSourceConnector = z.object({
is_active: z.boolean().default(true), is_active: z.boolean().default(true),
last_indexed_at: z.string().nullable(), last_indexed_at: z.string().nullable(),
config: z.record(z.string(), z.any()), config: z.record(z.string(), z.any()),
enable_summary: z.boolean().default(false),
periodic_indexing_enabled: z.boolean(), periodic_indexing_enabled: z.boolean(),
indexing_frequency_minutes: z.number().nullable(), indexing_frequency_minutes: z.number().nullable(),
next_scheduled_at: z.string().nullable(), next_scheduled_at: z.string().nullable(),
@ -94,6 +95,7 @@ export const createConnectorRequest = z.object({
is_active: true, is_active: true,
last_indexed_at: true, last_indexed_at: true,
config: true, config: true,
enable_summary: true,
periodic_indexing_enabled: true, periodic_indexing_enabled: true,
indexing_frequency_minutes: true, indexing_frequency_minutes: true,
next_scheduled_at: true, next_scheduled_at: true,
@ -118,6 +120,7 @@ export const updateConnectorRequest = z.object({
is_active: true, is_active: true,
last_indexed_at: true, last_indexed_at: true,
config: true, config: true,
enable_summary: true,
periodic_indexing_enabled: true, periodic_indexing_enabled: true,
indexing_frequency_minutes: true, indexing_frequency_minutes: true,
next_scheduled_at: true, next_scheduled_at: true,

View file

@ -135,6 +135,7 @@ export const createDocumentResponse = z.object({
export const uploadDocumentRequest = z.object({ export const uploadDocumentRequest = z.object({
files: z.array(z.instanceof(File)), files: z.array(z.instanceof(File)),
search_space_id: z.number(), search_space_id: z.number(),
should_summarize: z.boolean().default(false),
}); });
export const uploadDocumentResponse = z.object({ export const uploadDocumentResponse = z.object({

View file

@ -127,6 +127,7 @@ class DocumentsApiService {
formData.append("files", file); formData.append("files", file);
}); });
formData.append("search_space_id", String(parsedRequest.data.search_space_id)); formData.append("search_space_id", String(parsedRequest.data.search_space_id));
formData.append("should_summarize", String(parsedRequest.data.should_summarize));
return baseApiService.postFormData(`/api/v1/documents/fileupload`, uploadDocumentResponse, { return baseApiService.postFormData(`/api/v1/documents/fileupload`, uploadDocumentResponse, {
body: formData, body: formData,

View file

@ -70,7 +70,8 @@ const pendingSyncs = new Map<string, Promise<SyncHandle>>();
// v5: fixed duplicate key errors, stable cutoff dates, onMustRefetch handler, // v5: fixed duplicate key errors, stable cutoff dates, onMustRefetch handler,
// real-time documents table with title/created_by_id/status columns, // real-time documents table with title/created_by_id/status columns,
// consolidated single documents sync, pending state for document queue visibility // consolidated single documents sync, pending state for document queue visibility
const SYNC_VERSION = 5; // v6: added enable_summary column to search_source_connectors
const SYNC_VERSION = 6;
// Database name prefix for identifying SurfSense databases // Database name prefix for identifying SurfSense databases
const DB_PREFIX = "surfsense-"; const DB_PREFIX = "surfsense-";
@ -226,6 +227,7 @@ export async function initElectric(userId: string): Promise<ElectricClient> {
periodic_indexing_enabled BOOLEAN NOT NULL DEFAULT FALSE, periodic_indexing_enabled BOOLEAN NOT NULL DEFAULT FALSE,
indexing_frequency_minutes INTEGER, indexing_frequency_minutes INTEGER,
next_scheduled_at TIMESTAMPTZ, next_scheduled_at TIMESTAMPTZ,
enable_summary BOOLEAN NOT NULL DEFAULT FALSE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
); );