Merge remote-tracking branch 'upstream/dev' into feat/composio

This commit is contained in:
Anish Sarkar 2026-01-23 14:35:17 +05:30
commit fae52345f8
65 changed files with 3291 additions and 153 deletions

View file

@ -0,0 +1,29 @@
"""No-op migration for Composio support
Revision ID: 74
Revises: 73
Create Date: 2026-01-21
NOTE: This migration is a no-op since Composio is not supported yet.
"""
from collections.abc import Sequence
# revision identifiers, used by Alembic.
revision: str = "74"
down_revision: str | None = "73"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
"""No-op upgrade for Composio support."""
pass
def downgrade() -> None:
"""No-op downgrade for Composio support.
Note: PostgreSQL does not support removing enum values directly.
"""
pass

View file

@ -0,0 +1,75 @@
"""Add chat_session_state table for live collaboration
Revision ID: 75
Revises: 74
Creates chat_session_state table to track AI responding state per thread.
Enables real-time sync via Electric SQL for shared chat collaboration.
"""
from collections.abc import Sequence
from alembic import op
revision: str = "75"
down_revision: str | None = "74"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
"""Create chat_session_state table with Electric SQL replication."""
op.execute(
"""
CREATE TABLE IF NOT EXISTS chat_session_state (
id SERIAL PRIMARY KEY,
thread_id INTEGER NOT NULL REFERENCES new_chat_threads(id) ON DELETE CASCADE,
ai_responding_to_user_id UUID REFERENCES "user"(id) ON DELETE SET NULL,
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE (thread_id)
)
"""
)
op.execute(
"CREATE INDEX IF NOT EXISTS idx_chat_session_state_thread_id ON chat_session_state(thread_id)"
)
op.execute("ALTER TABLE chat_session_state REPLICA IDENTITY FULL;")
op.execute(
"""
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_publication_tables
WHERE pubname = 'electric_publication_default'
AND tablename = 'chat_session_state'
) THEN
ALTER PUBLICATION electric_publication_default ADD TABLE chat_session_state;
END IF;
END
$$;
"""
)
def downgrade() -> None:
"""Drop chat_session_state table and remove from Electric SQL replication."""
op.execute(
"""
DO $$
BEGIN
IF EXISTS (
SELECT 1 FROM pg_publication_tables
WHERE pubname = 'electric_publication_default'
AND tablename = 'chat_session_state'
) THEN
ALTER PUBLICATION electric_publication_default DROP TABLE chat_session_state;
END IF;
END
$$;
"""
)
op.execute("DROP TABLE IF EXISTS chat_session_state;")

View file

@ -0,0 +1,99 @@
"""Add live collaboration tables to Electric SQL publication
Revision ID: 76
Revises: 75
Enables real-time sync for live collaboration features:
- new_chat_messages: Live message sync between users
- chat_comments: Live comment updates
Note: User/member info is fetched via API (membersAtom) for client-side joins,
not via Electric SQL, to keep where clauses optimized and reduce complexity.
"""
from collections.abc import Sequence
from alembic import op
revision: str = "76"
down_revision: str | None = "75"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
"""Add live collaboration tables to Electric SQL replication."""
# Set REPLICA IDENTITY FULL for Electric SQL sync
op.execute("ALTER TABLE new_chat_messages REPLICA IDENTITY FULL;")
op.execute("ALTER TABLE chat_comments REPLICA IDENTITY FULL;")
# Add new_chat_messages to Electric publication
op.execute(
"""
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_publication_tables
WHERE pubname = 'electric_publication_default'
AND tablename = 'new_chat_messages'
) THEN
ALTER PUBLICATION electric_publication_default ADD TABLE new_chat_messages;
END IF;
END
$$;
"""
)
# Add chat_comments to Electric publication
op.execute(
"""
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_publication_tables
WHERE pubname = 'electric_publication_default'
AND tablename = 'chat_comments'
) THEN
ALTER PUBLICATION electric_publication_default ADD TABLE chat_comments;
END IF;
END
$$;
"""
)
def downgrade() -> None:
"""Remove live collaboration tables from Electric SQL replication."""
op.execute(
"""
DO $$
BEGIN
IF EXISTS (
SELECT 1 FROM pg_publication_tables
WHERE pubname = 'electric_publication_default'
AND tablename = 'new_chat_messages'
) THEN
ALTER PUBLICATION electric_publication_default DROP TABLE new_chat_messages;
END IF;
END
$$;
"""
)
op.execute(
"""
DO $$
BEGIN
IF EXISTS (
SELECT 1 FROM pg_publication_tables
WHERE pubname = 'electric_publication_default'
AND tablename = 'chat_comments'
) THEN
ALTER PUBLICATION electric_publication_default DROP TABLE chat_comments;
END IF;
END
$$;
"""
)
# Note: Not reverting REPLICA IDENTITY as it doesn't harm normal operations

View file

@ -0,0 +1,70 @@
"""Add thread_id to chat_comments for denormalized Electric subscriptions
This denormalization allows a single Electric SQL subscription per thread
instead of one per message, significantly reducing connection overhead.
Revision ID: 77
Revises: 76
"""
from collections.abc import Sequence
from alembic import op
revision: str = "77"
down_revision: str | None = "76"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
"""Add thread_id column to chat_comments and backfill from messages."""
# Add the column (nullable initially for backfill)
op.execute(
"""
ALTER TABLE chat_comments
ADD COLUMN IF NOT EXISTS thread_id INTEGER;
"""
)
# Backfill thread_id from the related message
op.execute(
"""
UPDATE chat_comments c
SET thread_id = m.thread_id
FROM new_chat_messages m
WHERE c.message_id = m.id
AND c.thread_id IS NULL;
"""
)
# Make it NOT NULL after backfill
op.execute(
"""
ALTER TABLE chat_comments
ALTER COLUMN thread_id SET NOT NULL;
"""
)
# Add FK constraint
op.execute(
"""
ALTER TABLE chat_comments
ADD CONSTRAINT fk_chat_comments_thread_id
FOREIGN KEY (thread_id) REFERENCES new_chat_threads(id) ON DELETE CASCADE;
"""
)
# Add index for efficient Electric subscriptions by thread
op.execute(
"CREATE INDEX IF NOT EXISTS idx_chat_comments_thread_id ON chat_comments(thread_id)"
)
def downgrade() -> None:
"""Remove thread_id column from chat_comments."""
op.execute("DROP INDEX IF EXISTS idx_chat_comments_thread_id")
op.execute(
"ALTER TABLE chat_comments DROP CONSTRAINT IF EXISTS fk_chat_comments_thread_id"
)
op.execute("ALTER TABLE chat_comments DROP COLUMN IF EXISTS thread_id")

View file

@ -0,0 +1,33 @@
"""Add Obsidian connector enums
Revision ID: 78
Revises: 77
Create Date: 2026-01-21
"""
from collections.abc import Sequence
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "78"
down_revision: str | None = "77"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
# Add OBSIDIAN_CONNECTOR to documenttype enum
op.execute("ALTER TYPE documenttype ADD VALUE IF NOT EXISTS 'OBSIDIAN_CONNECTOR'")
# Add OBSIDIAN_CONNECTOR to searchsourceconnectortype enum
op.execute(
"ALTER TYPE searchsourceconnectortype ADD VALUE IF NOT EXISTS 'OBSIDIAN_CONNECTOR'"
)
def downgrade() -> None:
# Note: PostgreSQL doesn't support removing enum values directly.
# The values will remain in the enum type but won't be used.
pass

View file

@ -1,8 +1,7 @@
"""Add Composio connector types to SearchSourceConnectorType and DocumentType enums
Revision ID: 74
Revises: 73
Create Date: 2026-01-21
Revision ID: 79
Revises: 78
This migration adds the Composio connector enum values to both:
- searchsourceconnectortype (for connector type tracking)
@ -23,8 +22,8 @@ from collections.abc import Sequence
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "74"
down_revision: str | None = "73"
revision: str = "79"
down_revision: str | None = "78"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None

View file

@ -34,6 +34,12 @@ You have access to the following tools:
- Returns: Documentation content with chunk IDs for citations (prefixed with 'doc-', e.g., [citation:doc-123])
1. search_knowledge_base: Search the user's personal knowledge base for relevant information.
- IMPORTANT: When searching for information (meetings, schedules, notes, tasks, etc.), ALWAYS search broadly
across ALL sources first by omitting connectors_to_search. The user may store information in various places
including calendar apps, note-taking apps (Obsidian, Notion), chat apps (Slack, Discord), and more.
- Only narrow to specific connectors if the user explicitly asks (e.g., "check my Slack" or "in my calendar").
- Personal notes in Obsidian, Notion, or NOTE often contain schedules, meeting times, reminders, and other
important information that may not be in calendars.
- Args:
- query: The search query - be specific and include key terms
- top_k: Number of results to retrieve (default: 10)
@ -157,6 +163,13 @@ You have access to the following tools:
stating "Based on your memory..." - integrate the context seamlessly.
</tools>
<tool_call_examples>
- User: "What time is the team meeting today?"
- Call: `search_knowledge_base(query="team meeting time today")` (searches ALL sources - calendar, notes, Obsidian, etc.)
- DO NOT limit to just calendar - the info might be in notes!
- User: "When is my gym session?"
- Call: `search_knowledge_base(query="gym session time schedule")` (searches ALL sources)
- User: "How do I install SurfSense?"
- Call: `search_surfsense_docs(query="installation setup")`
@ -175,6 +188,12 @@ You have access to the following tools:
- User: "What did I discuss on Slack last week about the React migration?"
- Call: `search_knowledge_base(query="React migration", connectors_to_search=["SLACK_CONNECTOR"], start_date="YYYY-MM-DD", end_date="YYYY-MM-DD")`
- User: "Check my Obsidian notes for meeting notes"
- Call: `search_knowledge_base(query="meeting notes", connectors_to_search=["OBSIDIAN_CONNECTOR"])`
- User: "What's in my Obsidian vault about project ideas?"
- Call: `search_knowledge_base(query="project ideas", connectors_to_search=["OBSIDIAN_CONNECTOR"])`
- User: "Remember that I prefer TypeScript over JavaScript"
- Call: `save_memory(content="User prefers TypeScript over JavaScript for development", category="preference")`

View file

@ -49,6 +49,7 @@ _ALL_CONNECTORS: list[str] = [
"BOOKSTACK_CONNECTOR",
"CRAWLED_URL",
"CIRCLEBACK",
"OBSIDIAN_CONNECTOR",
]
@ -508,6 +509,16 @@ async def search_knowledge_base_async(
)
all_documents.extend(chunks)
elif connector == "OBSIDIAN_CONNECTOR":
_, chunks = await connector_service.search_obsidian(
user_query=query,
search_space_id=search_space_id,
top_k=top_k,
start_date=resolved_start_date,
end_date=resolved_end_date,
)
all_documents.extend(chunks)
except Exception as e:
print(f"Error searching connector {connector}: {e}")
continue
@ -596,6 +607,7 @@ def create_search_knowledge_base_tool(
- WEBCRAWLER_CONNECTOR: "Webpages indexed by SurfSense" (personally selected websites)
- BOOKSTACK_CONNECTOR: "BookStack pages" (personal documentation)
- CIRCLEBACK: "Circleback meeting notes, transcripts, and action items" (personal meeting records)
- OBSIDIAN_CONNECTOR: "Obsidian vault notes and markdown files" (personal notes and knowledge management)
NOTE: `WEBCRAWLER_CONNECTOR` is mapped internally to the canonical document type `CRAWLED_URL`.

View file

@ -61,6 +61,21 @@ class Config:
"FFmpeg is not installed on the system. Please install it to use the Surfsense Podcaster."
)
# Deployment Mode (self-hosted or cloud)
# self-hosted: Full access to local file system connectors (Obsidian, etc.)
# cloud: Only cloud-based connectors available
DEPLOYMENT_MODE = os.getenv("SURFSENSE_DEPLOYMENT_MODE", "self-hosted")
@classmethod
def is_self_hosted(cls) -> bool:
"""Check if running in self-hosted mode."""
return cls.DEPLOYMENT_MODE == "self-hosted"
@classmethod
def is_cloud(cls) -> bool:
"""Check if running in cloud mode."""
return cls.DEPLOYMENT_MODE == "cloud"
# Database
DATABASE_URL = os.getenv("DATABASE_URL")

View file

@ -53,6 +53,7 @@ class DocumentType(str, Enum):
ELASTICSEARCH_CONNECTOR = "ELASTICSEARCH_CONNECTOR"
BOOKSTACK_CONNECTOR = "BOOKSTACK_CONNECTOR"
CIRCLEBACK = "CIRCLEBACK"
OBSIDIAN_CONNECTOR = "OBSIDIAN_CONNECTOR"
NOTE = "NOTE"
COMPOSIO_GOOGLE_DRIVE_CONNECTOR = "COMPOSIO_GOOGLE_DRIVE_CONNECTOR"
COMPOSIO_GMAIL_CONNECTOR = "COMPOSIO_GMAIL_CONNECTOR"
@ -83,6 +84,9 @@ class SearchSourceConnectorType(str, Enum):
WEBCRAWLER_CONNECTOR = "WEBCRAWLER_CONNECTOR"
BOOKSTACK_CONNECTOR = "BOOKSTACK_CONNECTOR"
CIRCLEBACK_CONNECTOR = "CIRCLEBACK_CONNECTOR"
OBSIDIAN_CONNECTOR = (
"OBSIDIAN_CONNECTOR" # Self-hosted only - Local Obsidian vault indexing
)
MCP_CONNECTOR = "MCP_CONNECTOR" # Model Context Protocol - User-defined API tools
COMPOSIO_GOOGLE_DRIVE_CONNECTOR = "COMPOSIO_GOOGLE_DRIVE_CONNECTOR"
COMPOSIO_GMAIL_CONNECTOR = "COMPOSIO_GMAIL_CONNECTOR"
@ -419,6 +423,13 @@ class ChatComment(BaseModel, TimestampMixin):
nullable=False,
index=True,
)
# Denormalized thread_id for efficient Electric SQL subscriptions (one per thread)
thread_id = Column(
Integer,
ForeignKey("new_chat_threads.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
parent_id = Column(
Integer,
ForeignKey("chat_comments.id", ondelete="CASCADE"),
@ -442,6 +453,7 @@ class ChatComment(BaseModel, TimestampMixin):
# Relationships
message = relationship("NewChatMessage", back_populates="comments")
thread = relationship("NewChatThread")
author = relationship("User")
parent = relationship(
"ChatComment", remote_side="ChatComment.id", backref="replies"
@ -478,6 +490,38 @@ class ChatCommentMention(BaseModel, TimestampMixin):
mentioned_user = relationship("User")
class ChatSessionState(BaseModel):
"""
Tracks real-time session state for shared chat collaboration.
One record per thread, synced via Electric SQL.
"""
__tablename__ = "chat_session_state"
thread_id = Column(
Integer,
ForeignKey("new_chat_threads.id", ondelete="CASCADE"),
nullable=False,
unique=True,
index=True,
)
ai_responding_to_user_id = Column(
UUID(as_uuid=True),
ForeignKey("user.id", ondelete="SET NULL"),
nullable=True,
index=True,
)
updated_at = Column(
TIMESTAMP(timezone=True),
nullable=False,
default=lambda: datetime.now(UTC),
onupdate=lambda: datetime.now(UTC),
)
thread = relationship("NewChatThread")
ai_responding_to_user = relationship("User")
class MemoryCategory(str, Enum):
"""Categories for user memories."""

View file

@ -990,7 +990,7 @@ async def handle_new_chat(
search_space_id=request.search_space_id,
chat_id=request.chat_id,
session=session,
user_id=str(user.id), # Pass user ID for memory tools
user_id=str(user.id), # Pass user ID for memory tools and session state
llm_config_id=llm_config_id,
attachments=request.attachments,
mentioned_document_ids=request.mentioned_document_ids,

View file

@ -901,6 +901,25 @@ async def index_connector_content(
)
response_message = "Web page indexing started in the background."
elif connector.connector_type == SearchSourceConnectorType.OBSIDIAN_CONNECTOR:
from app.config import config as app_config
from app.tasks.celery_tasks.connector_tasks import index_obsidian_vault_task
# Obsidian connector only available in self-hosted mode
if not app_config.is_self_hosted():
raise HTTPException(
status_code=400,
detail="Obsidian connector is only available in self-hosted mode",
)
logger.info(
f"Triggering Obsidian vault indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"
)
index_obsidian_vault_task.delay(
connector_id, search_space_id, str(user.id), indexing_from, indexing_to
)
response_message = "Obsidian vault indexing started in the background."
elif (
connector.connector_type
== SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR
@ -2195,6 +2214,58 @@ async def run_bookstack_indexing(
)
# Add new helper functions for Obsidian indexing
async def run_obsidian_indexing_with_new_session(
connector_id: int,
search_space_id: int,
user_id: str,
start_date: str,
end_date: str,
):
"""Wrapper to run Obsidian indexing with its own database session."""
logger.info(
f"Background task started: Indexing Obsidian connector {connector_id} into space {search_space_id} from {start_date} to {end_date}"
)
async with async_session_maker() as session:
await run_obsidian_indexing(
session, connector_id, search_space_id, user_id, start_date, end_date
)
logger.info(f"Background task finished: Indexing Obsidian connector {connector_id}")
async def run_obsidian_indexing(
session: AsyncSession,
connector_id: int,
search_space_id: int,
user_id: str,
start_date: str,
end_date: str,
):
"""
Background task to run Obsidian vault indexing.
Args:
session: Database session
connector_id: ID of the Obsidian connector
search_space_id: ID of the search space
user_id: ID of the user
start_date: Start date for indexing
end_date: End date for indexing
"""
from app.tasks.connector_indexers import index_obsidian_vault
await _run_indexing_with_notifications(
session=session,
connector_id=connector_id,
search_space_id=search_space_id,
user_id=user_id,
start_date=start_date,
end_date=end_date,
indexing_function=index_obsidian_vault,
update_timestamp_func=_update_connector_timestamp_by_id,
)
async def run_composio_indexing_with_new_session(
connector_id: int,
search_space_id: int,

View file

@ -0,0 +1,29 @@
"""
Pydantic schemas for chat session state (live collaboration).
"""
from datetime import datetime
from uuid import UUID
from pydantic import BaseModel, ConfigDict
class RespondingUser(BaseModel):
"""The user that the AI is currently responding to."""
id: UUID
display_name: str | None = None
email: str
model_config = ConfigDict(from_attributes=True)
class ChatSessionStateResponse(BaseModel):
"""Current session state for a chat thread."""
id: int
thread_id: int
responding_to: RespondingUser | None = None
updated_at: datetime
model_config = ConfigDict(from_attributes=True)

View file

@ -0,0 +1,59 @@
"""
Obsidian Connector Credentials Schema.
Obsidian is a local-first note-taking app that stores notes as markdown files.
This connector supports indexing from local file system (self-hosted only).
"""
from pydantic import BaseModel, field_validator
class ObsidianAuthCredentialsBase(BaseModel):
"""
Credentials/configuration for the Obsidian connector.
Since Obsidian vaults are local directories, this schema primarily
holds the vault path and configuration options rather than API tokens.
"""
vault_path: str
vault_name: str | None = None
exclude_folders: list[str] | None = None
include_attachments: bool = False
@field_validator("vault_path")
@classmethod
def validate_vault_path(cls, v: str) -> str:
"""Ensure vault path is provided and stripped of whitespace."""
if not v or not v.strip():
raise ValueError("Vault path is required")
return v.strip()
@field_validator("exclude_folders", mode="before")
@classmethod
def parse_exclude_folders(cls, v):
"""Parse exclude_folders from string if needed."""
if v is None:
return [".trash", ".obsidian", "templates"]
if isinstance(v, str):
return [f.strip() for f in v.split(",") if f.strip()]
return v
def to_dict(self) -> dict:
"""Convert credentials to dictionary for storage."""
return {
"vault_path": self.vault_path,
"vault_name": self.vault_name,
"exclude_folders": self.exclude_folders,
"include_attachments": self.include_attachments,
}
@classmethod
def from_dict(cls, data: dict) -> "ObsidianAuthCredentialsBase":
"""Create credentials from dictionary."""
return cls(
vault_path=data.get("vault_path", ""),
vault_name=data.get("vault_name"),
exclude_folders=data.get("exclude_folders"),
include_attachments=data.get("include_attachments", False),
)

View file

@ -281,8 +281,10 @@ async def create_comment(
detail="You don't have permission to create comments in this search space",
)
thread = message.thread
comment = ChatComment(
message_id=message_id,
thread_id=thread.id, # Denormalized for efficient Electric subscriptions
author_id=user.id,
content=content,
)
@ -299,7 +301,6 @@ async def create_comment(
user_names = await get_user_names_for_mentions(session, set(mentions_map.keys()))
# Create notifications for mentioned users (excluding author)
thread = message.thread
author_name = user.display_name or user.email
content_preview = render_mentions(content, user_names)
for mentioned_user_id, mention_id in mentions_map.items():
@ -393,8 +394,10 @@ async def create_reply(
detail="You don't have permission to create comments in this search space",
)
thread = parent_comment.message.thread
reply = ChatComment(
message_id=parent_comment.message_id,
thread_id=thread.id, # Denormalized for efficient Electric subscriptions
parent_id=comment_id,
author_id=user.id,
content=content,
@ -412,7 +415,6 @@ async def create_reply(
user_names = await get_user_names_for_mentions(session, set(mentions_map.keys()))
# Create notifications for mentioned users (excluding author)
thread = parent_comment.message.thread
author_name = user.display_name or user.email
content_preview = render_mentions(content, user_names)
for mentioned_user_id, mention_id in mentions_map.items():

View file

@ -0,0 +1,65 @@
"""
Service layer for chat session state (live collaboration).
"""
from datetime import UTC, datetime
from uuid import UUID
from sqlalchemy import select
from sqlalchemy.dialects.postgresql import insert
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from app.db import ChatSessionState
async def get_session_state(
session: AsyncSession,
thread_id: int,
) -> ChatSessionState | None:
"""Get the current session state for a thread."""
result = await session.execute(
select(ChatSessionState)
.options(selectinload(ChatSessionState.ai_responding_to_user))
.filter(ChatSessionState.thread_id == thread_id)
)
return result.scalar_one_or_none()
async def set_ai_responding(
session: AsyncSession,
thread_id: int,
user_id: UUID,
) -> ChatSessionState:
"""Mark AI as responding to a specific user. Uses upsert for atomicity."""
now = datetime.now(UTC)
upsert_query = insert(ChatSessionState).values(
thread_id=thread_id,
ai_responding_to_user_id=user_id,
updated_at=now,
)
upsert_query = upsert_query.on_conflict_do_update(
index_elements=["thread_id"],
set_={
"ai_responding_to_user_id": user_id,
"updated_at": now,
},
)
await session.execute(upsert_query)
await session.commit()
return await get_session_state(session, thread_id)
async def clear_ai_responding(
session: AsyncSession,
thread_id: int,
) -> ChatSessionState | None:
"""Clear AI responding state when response is complete."""
state = await get_session_state(session, thread_id)
if state:
state.ai_responding_to_user_id = None
state.updated_at = datetime.now(UTC)
await session.commit()
await session.refresh(state)
return state

View file

@ -2780,3 +2780,94 @@ class ConnectorService:
}
return result_object, circleback_docs
async def search_obsidian(
self,
user_query: str,
search_space_id: int,
top_k: int = 20,
start_date: datetime | None = None,
end_date: datetime | None = None,
) -> tuple:
"""
Search for Obsidian vault notes and return both the source information and langchain documents.
Uses combined chunk-level and document-level hybrid search with RRF fusion.
Args:
user_query: The user's query
search_space_id: The search space ID to search in
top_k: Maximum number of results to return
start_date: Optional start date for filtering documents by updated_at
end_date: Optional end date for filtering documents by updated_at
Returns:
tuple: (sources_info, langchain_documents)
"""
obsidian_docs = await self._combined_rrf_search(
query_text=user_query,
search_space_id=search_space_id,
document_type="OBSIDIAN_CONNECTOR",
top_k=top_k,
start_date=start_date,
end_date=end_date,
)
# Early return if no results
if not obsidian_docs:
return {
"id": 53,
"name": "Obsidian Vault",
"type": "OBSIDIAN_CONNECTOR",
"sources": [],
}, []
def _title_fn(doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
return doc_info.get("title", "Untitled Note")
def _url_fn(doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
# Obsidian URL format: obsidian://vault_name/path
return doc_info.get("url", "")
def _description_fn(
chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
) -> str:
description = self._chunk_preview(chunk.get("content", ""), limit=200)
info_parts = []
vault_name = metadata.get("vault_name")
tags = metadata.get("tags", [])
if vault_name:
info_parts.append(f"Vault: {vault_name}")
if tags and isinstance(tags, list) and len(tags) > 0:
info_parts.append(f"Tags: {', '.join(tags[:3])}")
if info_parts:
description = (description + " | " + " | ".join(info_parts)).strip(" |")
return description
def _extra_fields_fn(
_chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
) -> dict[str, Any]:
return {
"vault_name": metadata.get("vault_name", ""),
"file_path": metadata.get("file_path", ""),
"tags": metadata.get("tags", []),
"outgoing_links": metadata.get("outgoing_links", []),
}
sources_list = self._build_chunk_sources_from_documents(
obsidian_docs,
title_fn=_title_fn,
url_fn=_url_fn,
description_fn=_description_fn,
extra_fields_fn=_extra_fields_fn,
)
# Create result object
result_object = {
"id": 53,
"name": "Obsidian Vault",
"type": "OBSIDIAN_CONNECTOR",
"sources": sources_list,
}
return result_object, obsidian_docs

View file

@ -623,6 +623,28 @@ class MentionNotificationHandler(BaseNotificationHandler):
def __init__(self):
super().__init__("new_mention")
async def find_notification_by_mention(
self,
session: AsyncSession,
mention_id: int,
) -> Notification | None:
"""
Find an existing notification by mention ID.
Args:
session: Database session
mention_id: The mention ID to search for
Returns:
Notification if found, None otherwise
"""
query = select(Notification).where(
Notification.type == self.notification_type,
Notification.notification_metadata["mention_id"].astext == str(mention_id),
)
result = await session.execute(query)
return result.scalar_one_or_none()
async def notify_new_mention(
self,
session: AsyncSession,
@ -641,11 +663,12 @@ class MentionNotificationHandler(BaseNotificationHandler):
) -> Notification:
"""
Create notification when a user is @mentioned in a comment.
Uses mention_id for idempotency to prevent duplicate notifications.
Args:
session: Database session
mentioned_user_id: User who was mentioned
mention_id: ID of the mention record
mention_id: ID of the mention record (used for idempotency)
comment_id: ID of the comment containing the mention
message_id: ID of the message being commented on
thread_id: ID of the chat thread
@ -658,8 +681,16 @@ class MentionNotificationHandler(BaseNotificationHandler):
search_space_id: Search space ID
Returns:
Notification: The created notification
Notification: The created or existing notification
"""
# Check if notification already exists for this mention (idempotency)
existing = await self.find_notification_by_mention(session, mention_id)
if existing:
logger.info(
f"Notification already exists for mention {mention_id}, returning existing"
)
return existing
title = f"{author_name} mentioned you"
message = content_preview[:100] + ("..." if len(content_preview) > 100 else "")
@ -676,21 +707,37 @@ class MentionNotificationHandler(BaseNotificationHandler):
"content_preview": content_preview[:200],
}
notification = Notification(
user_id=mentioned_user_id,
search_space_id=search_space_id,
type=self.notification_type,
title=title,
message=message,
notification_metadata=metadata,
)
session.add(notification)
await session.commit()
await session.refresh(notification)
logger.info(
f"Created new_mention notification {notification.id} for user {mentioned_user_id}"
)
return notification
try:
notification = Notification(
user_id=mentioned_user_id,
search_space_id=search_space_id,
type=self.notification_type,
title=title,
message=message,
notification_metadata=metadata,
)
session.add(notification)
await session.commit()
await session.refresh(notification)
logger.info(
f"Created new_mention notification {notification.id} for user {mentioned_user_id}"
)
return notification
except Exception as e:
# Handle race condition - if duplicate key error, try to fetch existing
await session.rollback()
if (
"duplicate key" in str(e).lower()
or "unique constraint" in str(e).lower()
):
logger.warning(
f"Duplicate notification detected for mention {mention_id}, fetching existing"
)
existing = await self.find_notification_by_mention(session, mention_id)
if existing:
return existing
# Re-raise if not a duplicate key error or couldn't find existing
raise
class NotificationService:

View file

@ -761,6 +761,49 @@ async def _index_bookstack_pages(
)
@celery_app.task(name="index_obsidian_vault", bind=True)
def index_obsidian_vault_task(
self,
connector_id: int,
search_space_id: int,
user_id: str,
start_date: str,
end_date: str,
):
"""Celery task to index Obsidian vault notes."""
import asyncio
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
loop.run_until_complete(
_index_obsidian_vault(
connector_id, search_space_id, user_id, start_date, end_date
)
)
finally:
loop.close()
async def _index_obsidian_vault(
connector_id: int,
search_space_id: int,
user_id: str,
start_date: str,
end_date: str,
):
"""Index Obsidian vault with new session."""
from app.routes.search_source_connectors_routes import (
run_obsidian_indexing,
)
async with get_celery_session_maker()() as session:
await run_obsidian_indexing(
session, connector_id, search_space_id, user_id, start_date, end_date
)
@celery_app.task(name="index_composio_connector", bind=True)
def index_composio_connector_task(
self,

View file

@ -11,6 +11,7 @@ Supports loading LLM configurations from:
import json
from collections.abc import AsyncGenerator
from uuid import UUID
from langchain_core.messages import HumanMessage
from sqlalchemy.ext.asyncio import AsyncSession
@ -27,6 +28,10 @@ from app.agents.new_chat.llm_config import (
)
from app.db import Document, SurfsenseDocsDocument
from app.schemas.new_chat import ChatAttachment
from app.services.chat_session_state_service import (
clear_ai_responding,
set_ai_responding,
)
from app.services.connector_service import ConnectorService
from app.services.new_streaming_service import VercelStreamingService
@ -167,9 +172,8 @@ async def stream_new_chat(
search_space_id: The search space ID
chat_id: The chat ID (used as LangGraph thread_id for memory)
session: The database session
user_id: The current user's UUID string (for memory tools)
user_id: The current user's UUID string (for memory tools and session state)
llm_config_id: The LLM configuration ID (default: -1 for first global config)
messages: Optional chat history from frontend (list of ChatMessage)
attachments: Optional attachments with extracted content
mentioned_document_ids: Optional list of document IDs mentioned with @ in the chat
mentioned_surfsense_doc_ids: Optional list of SurfSense doc IDs mentioned with @ in the chat
@ -183,6 +187,9 @@ async def stream_new_chat(
current_text_id: str | None = None
try:
# Mark AI as responding to this user for live collaboration
if user_id:
await set_ai_responding(session, chat_id, UUID(user_id))
# Load LLM config - supports both YAML (negative IDs) and database (positive IDs)
agent_config: AgentConfig | None = None
@ -1147,3 +1154,7 @@ async def stream_new_chat(
yield streaming_service.format_finish_step()
yield streaming_service.format_finish()
yield streaming_service.format_done()
finally:
# Clear AI responding state for live collaboration
await clear_ai_responding(session, chat_id)

View file

@ -46,6 +46,7 @@ from .luma_indexer import index_luma_events
# Documentation and knowledge management
from .notion_indexer import index_notion_pages
from .obsidian_indexer import index_obsidian_vault
from .slack_indexer import index_slack_messages
from .webcrawler_indexer import index_crawled_urls
@ -68,6 +69,7 @@ __all__ = [ # noqa: RUF022
"index_linear_issues",
# Documentation and knowledge management
"index_notion_pages",
"index_obsidian_vault",
"index_crawled_urls",
# Communication platforms
"index_slack_messages",

View file

@ -0,0 +1,516 @@
"""
Obsidian connector indexer.
Indexes markdown notes from a local Obsidian vault.
This connector is only available in self-hosted mode.
"""
import os
import re
from datetime import UTC, datetime
from pathlib import Path
import yaml
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.ext.asyncio import AsyncSession
from app.config import config
from app.db import Document, DocumentType, SearchSourceConnectorType
from app.services.llm_service import get_user_long_context_llm
from app.services.task_logging_service import TaskLoggingService
from app.utils.document_converters import (
create_document_chunks,
generate_content_hash,
generate_document_summary,
generate_unique_identifier_hash,
)
from .base import (
build_document_metadata_string,
check_document_by_unique_identifier,
get_connector_by_id,
get_current_timestamp,
logger,
update_connector_last_indexed,
)
def parse_frontmatter(content: str) -> tuple[dict | None, str]:
"""
Parse YAML frontmatter from markdown content.
Args:
content: The full markdown content
Returns:
Tuple of (frontmatter dict or None, content without frontmatter)
"""
if not content.startswith("---"):
return None, content
# Find the closing ---
end_match = re.search(r"\n---\n", content[3:])
if not end_match:
return None, content
frontmatter_str = content[3 : end_match.start() + 3]
remaining_content = content[end_match.end() + 3 :]
try:
frontmatter = yaml.safe_load(frontmatter_str)
return frontmatter, remaining_content.strip()
except yaml.YAMLError:
return None, content
def extract_wiki_links(content: str) -> list[str]:
"""
Extract [[wiki-style links]] from content.
Args:
content: Markdown content
Returns:
List of linked note names
"""
# Match [[link]] or [[link|alias]]
pattern = r"\[\[([^\]|]+)(?:\|[^\]]+)?\]\]"
matches = re.findall(pattern, content)
return list(set(matches))
def extract_tags(content: str) -> list[str]:
"""
Extract #tags from content (both inline and frontmatter).
Args:
content: Markdown content
Returns:
List of tags (without # prefix)
"""
# Match #tag but not ## headers
pattern = r"(?<!\S)#([a-zA-Z][a-zA-Z0-9_/-]*)"
matches = re.findall(pattern, content)
return list(set(matches))
def scan_vault(
vault_path: str,
exclude_folders: list[str] | None = None,
) -> list[dict]:
"""
Scan an Obsidian vault for markdown files.
Args:
vault_path: Path to the Obsidian vault
exclude_folders: List of folder names to exclude
Returns:
List of file info dicts with path, name, modified time
"""
if exclude_folders is None:
exclude_folders = [".trash", ".obsidian", "templates"]
vault = Path(vault_path)
if not vault.exists():
raise ValueError(f"Vault path does not exist: {vault_path}")
files = []
for md_file in vault.rglob("*.md"):
# Check if file is in an excluded folder
relative_path = md_file.relative_to(vault)
parts = relative_path.parts
if any(excluded in parts for excluded in exclude_folders):
continue
try:
stat = md_file.stat()
files.append(
{
"path": str(md_file),
"relative_path": str(relative_path),
"name": md_file.stem,
"modified_at": datetime.fromtimestamp(stat.st_mtime, tz=UTC),
"created_at": datetime.fromtimestamp(stat.st_ctime, tz=UTC),
"size": stat.st_size,
}
)
except OSError as e:
logger.warning(f"Could not stat file {md_file}: {e}")
return files
async def index_obsidian_vault(
session: AsyncSession,
connector_id: int,
search_space_id: int,
user_id: str,
start_date: str | None = None,
end_date: str | None = None,
update_last_indexed: bool = True,
) -> tuple[int, str | None]:
"""
Index notes from a local Obsidian vault.
This indexer is only available in self-hosted mode as it requires
direct file system access to the user's Obsidian vault.
Args:
session: Database session
connector_id: ID of the Obsidian connector
search_space_id: ID of the search space to store documents in
user_id: ID of the user
start_date: Start date for filtering (YYYY-MM-DD format) - optional
end_date: End date for filtering (YYYY-MM-DD format) - optional
update_last_indexed: Whether to update the last_indexed_at timestamp
Returns:
Tuple containing (number of documents indexed, error message or None)
"""
task_logger = TaskLoggingService(session, search_space_id)
# Check if self-hosted mode
if not config.is_self_hosted():
return 0, "Obsidian connector is only available in self-hosted mode"
# Log task start
log_entry = await task_logger.log_task_start(
task_name="obsidian_vault_indexing",
source="connector_indexing_task",
message=f"Starting Obsidian vault indexing for connector {connector_id}",
metadata={
"connector_id": connector_id,
"user_id": str(user_id),
"start_date": start_date,
"end_date": end_date,
},
)
try:
# Get the connector
await task_logger.log_task_progress(
log_entry,
f"Retrieving Obsidian connector {connector_id} from database",
{"stage": "connector_retrieval"},
)
connector = await get_connector_by_id(
session, connector_id, SearchSourceConnectorType.OBSIDIAN_CONNECTOR
)
if not connector:
await task_logger.log_task_failure(
log_entry,
f"Connector with ID {connector_id} not found or is not an Obsidian connector",
"Connector not found",
{"error_type": "ConnectorNotFound"},
)
return (
0,
f"Connector with ID {connector_id} not found or is not an Obsidian connector",
)
# Get vault path from connector config
vault_path = connector.config.get("vault_path")
if not vault_path:
await task_logger.log_task_failure(
log_entry,
"Vault path not configured for this connector",
"Missing vault path",
{"error_type": "MissingVaultPath"},
)
return 0, "Vault path not configured for this connector"
# Validate vault path exists
if not os.path.exists(vault_path):
await task_logger.log_task_failure(
log_entry,
f"Vault path does not exist: {vault_path}",
"Vault path not found",
{"error_type": "VaultNotFound", "vault_path": vault_path},
)
return 0, f"Vault path does not exist: {vault_path}"
# Get configuration options
exclude_folders = connector.config.get(
"exclude_folders", [".trash", ".obsidian", "templates"]
)
vault_name = connector.config.get("vault_name") or os.path.basename(vault_path)
await task_logger.log_task_progress(
log_entry,
f"Scanning Obsidian vault: {vault_name}",
{"stage": "vault_scan", "vault_path": vault_path},
)
# Scan vault for markdown files
try:
files = scan_vault(vault_path, exclude_folders)
except Exception as e:
await task_logger.log_task_failure(
log_entry,
f"Failed to scan vault: {e}",
"Vault scan error",
{"error_type": "VaultScanError"},
)
return 0, f"Failed to scan vault: {e}"
logger.info(f"Found {len(files)} markdown files in vault")
await task_logger.log_task_progress(
log_entry,
f"Found {len(files)} markdown files to process",
{"stage": "files_discovered", "file_count": len(files)},
)
# Filter by date if provided (handle "undefined" string from frontend)
# Also handle inverted dates (start > end) by skipping filtering
start_dt = None
end_dt = None
if start_date and start_date != "undefined":
start_dt = datetime.strptime(start_date, "%Y-%m-%d").replace(tzinfo=UTC)
if end_date and end_date != "undefined":
# Make end_date inclusive (end of day)
end_dt = datetime.strptime(end_date, "%Y-%m-%d").replace(tzinfo=UTC)
end_dt = end_dt.replace(hour=23, minute=59, second=59)
# Only apply date filtering if dates are valid and in correct order
if start_dt and end_dt and start_dt > end_dt:
logger.warning(
f"start_date ({start_date}) is after end_date ({end_date}), skipping date filter"
)
else:
if start_dt:
files = [f for f in files if f["modified_at"] >= start_dt]
logger.info(
f"After start_date filter ({start_date}): {len(files)} files"
)
if end_dt:
files = [f for f in files if f["modified_at"] <= end_dt]
logger.info(f"After end_date filter ({end_date}): {len(files)} files")
logger.info(f"Processing {len(files)} files after date filtering")
# Get LLM for summarization
long_context_llm = await get_user_long_context_llm(
session, user_id, search_space_id
)
indexed_count = 0
skipped_count = 0
for file_info in files:
try:
file_path = file_info["path"]
relative_path = file_info["relative_path"]
# Read file content
try:
with open(file_path, encoding="utf-8") as f:
content = f.read()
except UnicodeDecodeError:
logger.warning(f"Could not decode file {file_path}, skipping")
skipped_count += 1
continue
if not content.strip():
logger.debug(f"Empty file {file_path}, skipping")
skipped_count += 1
continue
# Parse frontmatter and extract metadata
frontmatter, body_content = parse_frontmatter(content)
wiki_links = extract_wiki_links(content)
tags = extract_tags(content)
# Get title from frontmatter or filename
title = file_info["name"]
if frontmatter:
title = frontmatter.get("title", title)
# Also extract tags from frontmatter
fm_tags = frontmatter.get("tags", [])
if isinstance(fm_tags, list):
tags = list({*tags, *fm_tags})
elif isinstance(fm_tags, str):
tags = list({*tags, fm_tags})
# Generate unique identifier using vault name and relative path
unique_identifier = f"{vault_name}:{relative_path}"
unique_identifier_hash = generate_unique_identifier_hash(
DocumentType.OBSIDIAN_CONNECTOR,
unique_identifier,
search_space_id,
)
# Check for existing document
existing_document = await check_document_by_unique_identifier(
session, unique_identifier_hash
)
# Generate content hash
content_hash = generate_content_hash(content, search_space_id)
# Build metadata
document_metadata = {
"vault_name": vault_name,
"file_path": relative_path,
"tags": tags,
"outgoing_links": wiki_links,
"frontmatter": frontmatter,
"modified_at": file_info["modified_at"].isoformat(),
"created_at": file_info["created_at"].isoformat(),
"word_count": len(body_content.split()),
}
# Build document content with metadata
metadata_sections = [
(
"METADATA",
[
f"Title: {title}",
f"Vault: {vault_name}",
f"Path: {relative_path}",
f"Tags: {', '.join(tags) if tags else 'None'}",
f"Links to: {', '.join(wiki_links) if wiki_links else 'None'}",
],
),
("CONTENT", [body_content]),
]
document_string = build_document_metadata_string(metadata_sections)
if existing_document:
# Check if content has changed
if existing_document.content_hash == content_hash:
logger.debug(f"Note {title} unchanged, skipping")
skipped_count += 1
continue
# Update existing document
logger.info(f"Updating note: {title}")
# Generate new summary if content changed
if long_context_llm:
new_summary, _ = await generate_document_summary(
document_string,
long_context_llm,
document_metadata,
)
# Store summary in metadata
document_metadata["summary"] = new_summary
# Add URL and connector_id to metadata
document_metadata["url"] = (
f"obsidian://{vault_name}/{relative_path}"
)
document_metadata["connector_id"] = connector_id
existing_document.content = document_string
existing_document.content_hash = content_hash
existing_document.document_metadata = document_metadata
existing_document.updated_at = get_current_timestamp()
# Update embedding
embedding = config.embedding_model_instance.embed(document_string)
existing_document.embedding = embedding
# Update chunks - delete old and create new
existing_document.chunks.clear()
new_chunks = await create_document_chunks(document_string)
existing_document.chunks = new_chunks
indexed_count += 1
else:
# Create new document
logger.info(f"Indexing new note: {title}")
# Generate summary
summary_content = ""
if long_context_llm:
summary_content, _ = await generate_document_summary(
document_string,
long_context_llm,
document_metadata,
)
# Generate embedding
embedding = config.embedding_model_instance.embed(document_string)
# Add URL and summary to metadata
document_metadata["url"] = (
f"obsidian://{vault_name}/{relative_path}"
)
document_metadata["summary"] = summary_content
document_metadata["connector_id"] = connector_id
# Create chunks
chunks = await create_document_chunks(document_string)
# Create document
new_document = Document(
search_space_id=search_space_id,
title=title,
document_type=DocumentType.OBSIDIAN_CONNECTOR,
content=document_string,
content_hash=content_hash,
unique_identifier_hash=unique_identifier_hash,
document_metadata=document_metadata,
embedding=embedding,
chunks=chunks,
updated_at=get_current_timestamp(),
)
session.add(new_document)
indexed_count += 1
except Exception as e:
logger.exception(
f"Error processing file {file_info.get('path', 'unknown')}: {e}"
)
skipped_count += 1
continue
# Update connector's last indexed timestamp
await update_connector_last_indexed(session, connector, update_last_indexed)
# Commit all changes
await session.commit()
await task_logger.log_task_success(
log_entry,
f"Successfully indexed {indexed_count} Obsidian notes (skipped {skipped_count})",
{
"indexed_count": indexed_count,
"skipped_count": skipped_count,
"total_files": len(files),
},
)
return indexed_count, None
except SQLAlchemyError as e:
logger.exception(f"Database error during Obsidian indexing: {e}")
await session.rollback()
await task_logger.log_task_failure(
log_entry,
f"Database error during Obsidian indexing: {e}",
"Database error",
{"error_type": "SQLAlchemyError"},
)
return 0, f"Database error: {e}"
except Exception as e:
logger.exception(f"Error during Obsidian indexing: {e}")
await task_logger.log_task_failure(
log_entry,
f"Error during Obsidian indexing: {e}",
"Unexpected error",
{"error_type": type(e).__name__},
)
return 0, str(e)

View file

@ -46,12 +46,9 @@ dependencies = [
"boto3>=1.35.0",
"langchain-community>=0.3.31",
"langchain-unstructured>=1.0.0",
"langchain>=1.2.0",
"litellm>=1.80.10",
"langchain-litellm>=0.3.5",
"langgraph>=1.0.5",
"fake-useragent>=2.2.0",
"deepagents>=0.3.0",
"trafilatura>=2.0.0",
"fastapi-users[oauth,sqlalchemy]>=15.0.3",
"chonkie[all]>=1.5.0",
@ -62,6 +59,9 @@ dependencies = [
"sse-starlette>=3.1.1,<3.1.2",
"gitingest>=0.3.1",
"composio>=0.10.9",
"deepagents>=0.3.8",
"langchain>=1.2.6",
"langgraph>=1.0.5",
]
[dependency-groups]

View file

@ -195,7 +195,7 @@ wheels = [
[[package]]
name = "anthropic"
version = "0.75.0"
version = "0.76.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "anyio" },
@ -207,9 +207,9 @@ dependencies = [
{ name = "sniffio" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/04/1f/08e95f4b7e2d35205ae5dcbb4ae97e7d477fc521c275c02609e2931ece2d/anthropic-0.75.0.tar.gz", hash = "sha256:e8607422f4ab616db2ea5baacc215dd5f028da99ce2f022e33c7c535b29f3dfb", size = 439565 }
sdist = { url = "https://files.pythonhosted.org/packages/6e/be/d11abafaa15d6304826438170f7574d750218f49a106c54424a40cef4494/anthropic-0.76.0.tar.gz", hash = "sha256:e0cae6a368986d5cf6df743dfbb1b9519e6a9eee9c6c942ad8121c0b34416ffe", size = 495483 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/60/1c/1cd02b7ae64302a6e06724bf80a96401d5313708651d277b1458504a1730/anthropic-0.75.0-py3-none-any.whl", hash = "sha256:ea8317271b6c15d80225a9f3c670152746e88805a7a61e14d4a374577164965b", size = 388164 },
{ url = "https://files.pythonhosted.org/packages/e5/70/7b0fd9c1a738f59d3babe2b4212031c34ab7d0fda4ffef15b58a55c5bcea/anthropic-0.76.0-py3-none-any.whl", hash = "sha256:81efa3113901192af2f0fe977d3ec73fdadb1e691586306c4256cd6d5ccc331c", size = 390309 },
]
[[package]]
@ -1231,17 +1231,18 @@ wheels = [
[[package]]
name = "deepagents"
version = "0.3.0"
version = "0.3.8"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "langchain" },
{ name = "langchain-anthropic" },
{ name = "langchain-core" },
{ name = "langchain-google-genai" },
{ name = "wcmatch" },
]
sdist = { url = "https://files.pythonhosted.org/packages/cc/c7/d3c2840bd0e66b6cd5948aa69625e129328ad261308e18fcb9a9420709da/deepagents-0.3.0.tar.gz", hash = "sha256:3dd4d2ed53efb1ef78aeb1020a5696c0ec7e58e627b305a6665d33fe6fbdedff", size = 51387 }
sdist = { url = "https://files.pythonhosted.org/packages/47/69/d8dd80dd5c0c81393cc32623dd51e642c8607ab798276506b3b3e89b1f20/deepagents-0.3.8.tar.gz", hash = "sha256:4b8252f8deaad449ce39426cc2233a597ee079b9b690647a26d128c16d6c6eb8", size = 73956 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/22/e9/60bab7f37ff38bf982ea578e457ed1878ded613a3425462bcd07b00487e9/deepagents-0.3.0-py3-none-any.whl", hash = "sha256:9e23532d8d535dc2b0b4e0834453a1223a6a8f81b77947c0faf54537d05ce89a", size = 54065 },
{ url = "https://files.pythonhosted.org/packages/a6/6a/35968909bd3184eafee97326bcfd16c99bf9b0e03aadb3327eaf7229ea11/deepagents-0.3.8-py3-none-any.whl", hash = "sha256:7c76205dc014173d795402045b51505517954c7c4a508175f8e4a529f51928cc", size = 79161 },
]
[[package]]
@ -1992,9 +1993,9 @@ dependencies = [
{ name = "starlette" },
{ name = "tiktoken" },
]
sdist = { url = "https://files.pythonhosted.org/packages/d6/fe/a915f0c32a3d7920206a677f73c185b3eadf4ec151fb05aedd52e64713f7/gitingest-0.3.1.tar.gz", hash = "sha256:4587cab873d4e08bdb16d612bb153c23e0ce59771a1d57a438239c5e39f05ebf", size = 70681, upload-time = "2025-07-31T13:56:19.845Z" }
sdist = { url = "https://files.pythonhosted.org/packages/d6/fe/a915f0c32a3d7920206a677f73c185b3eadf4ec151fb05aedd52e64713f7/gitingest-0.3.1.tar.gz", hash = "sha256:4587cab873d4e08bdb16d612bb153c23e0ce59771a1d57a438239c5e39f05ebf", size = 70681 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/00/15/f200ab2e73287e67d1dce6fbacf421552ae9fbafdc5f0cc8dd0d2fe4fc47/gitingest-0.3.1-py3-none-any.whl", hash = "sha256:8143a5e6a7140ede9f680e13d3931ac07c82ac9bd8bab9ad1fba017c8c1e8666", size = 68343, upload-time = "2025-07-31T13:56:17.729Z" },
{ url = "https://files.pythonhosted.org/packages/00/15/f200ab2e73287e67d1dce6fbacf421552ae9fbafdc5f0cc8dd0d2fe4fc47/gitingest-0.3.1-py3-none-any.whl", hash = "sha256:8143a5e6a7140ede9f680e13d3931ac07c82ac9bd8bab9ad1fba017c8c1e8666", size = 68343 },
]
[[package]]
@ -2916,30 +2917,30 @@ wheels = [
[[package]]
name = "langchain"
version = "1.2.0"
version = "1.2.6"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "langchain-core" },
{ name = "langgraph" },
{ name = "pydantic" },
]
sdist = { url = "https://files.pythonhosted.org/packages/b1/12/3a74c22abdfddd877dfc2ee666d516f9132877fcd25eb4dd694835c59c79/langchain-1.2.0.tar.gz", hash = "sha256:a087d1e2b2969819e29a91a6d5f98302aafe31bd49ba377ecee3bf5a5dcfe14a", size = 536126 }
sdist = { url = "https://files.pythonhosted.org/packages/f5/bc/d8f506a525baadee99a65c6cc28c1c35c9eaf1cb2009f048e9861d81a600/langchain-1.2.6.tar.gz", hash = "sha256:7d46cbf719d860a16f6fc182d5d3de17453dda187f3d43e9c40ac352a5094fdd", size = 553127 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/23/00/4e3fa0d90f5a5c376ccb8ca983d0f0f7287783dfac48702e18f01d24673b/langchain-1.2.0-py3-none-any.whl", hash = "sha256:82f0d17aa4fbb11560b30e1e7d4aeb75e3ad71ce09b85c90ab208b181a24ffac", size = 102828 },
{ url = "https://files.pythonhosted.org/packages/3f/28/d5dc4cb06ccb29d62a590d446072964766555e85863f5044c6e644c07d0d/langchain-1.2.6-py3-none-any.whl", hash = "sha256:a9a6c39f03c09b6eb0f1b47e267ad2a2fd04e124dfaa9753bd6c11d2fe7d944e", size = 108458 },
]
[[package]]
name = "langchain-anthropic"
version = "1.3.0"
version = "1.3.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "anthropic" },
{ name = "langchain-core" },
{ name = "pydantic" },
]
sdist = { url = "https://files.pythonhosted.org/packages/de/50/cc3b3e0410d86de457d7a100dde763fc1c33c4ce884e883659aa4cf95538/langchain_anthropic-1.3.0.tar.gz", hash = "sha256:497a937ee0310c588196bff37f39f02d43d87bff3a12d16278bdbc3bd0e9a80b", size = 707207 }
sdist = { url = "https://files.pythonhosted.org/packages/0d/b6/ac5ee84e15bf79844c9c791f99a614c7ec7e1a63c2947e55977be01a81b4/langchain_anthropic-1.3.1.tar.gz", hash = "sha256:4f3d7a4a7729ab1aeaf62d32c87d4d227c1b5421668ca9e3734562b383470b07", size = 708940 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/29/ca/0725bc347a9c226da9d76f85bf7d03115caec7dbc87876af68579c4ab24e/langchain_anthropic-1.3.0-py3-none-any.whl", hash = "sha256:3823560e1df15d6082636baa04f87cb59052ba70aada0eba381c4679b1ce0eba", size = 45724 },
{ url = "https://files.pythonhosted.org/packages/9a/4f/7a5b32764addf4b757545b89899b9d76688176f19e4ee89868e3b8bbfd0f/langchain_anthropic-1.3.1-py3-none-any.whl", hash = "sha256:1fc28cf8037c30597ee6172fc2ff9e345efe8149a8c2a39897b1eebba2948322", size = 46328 },
]
[[package]]
@ -2967,7 +2968,7 @@ wheels = [
[[package]]
name = "langchain-core"
version = "1.2.1"
version = "1.2.7"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "jsonpatch" },
@ -2979,9 +2980,24 @@ dependencies = [
{ name = "typing-extensions" },
{ name = "uuid-utils" },
]
sdist = { url = "https://files.pythonhosted.org/packages/f5/a0/2177f4ef4dfbea8edeba377b7b4889d177b8356ce186640e4651b240fd4d/langchain_core-1.2.1.tar.gz", hash = "sha256:131e6ad105b47ec2adc4d4d973f569276688f48cd890ba44603d48e76d9993ce", size = 802986 }
sdist = { url = "https://files.pythonhosted.org/packages/a2/0e/664d8d81b3493e09cbab72448d2f9d693d1fa5aa2bcc488602203a9b6da0/langchain_core-1.2.7.tar.gz", hash = "sha256:e1460639f96c352b4a41c375f25aeb8d16ffc1769499fb1c20503aad59305ced", size = 837039 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/cc/95/98c47dbb4b6098934ff70e0f52efef3a85505dbcccc9eb63587e21fde4c9/langchain_core-1.2.1-py3-none-any.whl", hash = "sha256:2f63859f85dc3d95f768e35fed605702e3ff5aa3e92c7b253103119613e79768", size = 475972 },
{ url = "https://files.pythonhosted.org/packages/6e/6f/34a9fba14d191a67f7e2ee3dbce3e9b86d2fa7310e2c7f2c713583481bd2/langchain_core-1.2.7-py3-none-any.whl", hash = "sha256:452f4fef7a3d883357b22600788d37e3d8854ef29da345b7ac7099f33c31828b", size = 490232 },
]
[[package]]
name = "langchain-google-genai"
version = "4.2.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "filetype" },
{ name = "google-genai" },
{ name = "langchain-core" },
{ name = "pydantic" },
]
sdist = { url = "https://files.pythonhosted.org/packages/d8/0b/eae2305e207574dc633983a8a82a745e0ede1bce1f3a9daff24d2341fadc/langchain_google_genai-4.2.0.tar.gz", hash = "sha256:9a8d9bfc35354983ed29079cefff53c3e7c9c2a44b6ba75cc8f13a0cf8b55c33", size = 277361 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/22/51/39942c0083139652494bb354dddf0ed397703a4882302f7b48aeca531c96/langchain_google_genai-4.2.0-py3-none-any.whl", hash = "sha256:856041aaafceff65a4ef0d5acf5731f2db95229ff041132af011aec51e8279d9", size = 66452 },
]
[[package]]
@ -4516,9 +4532,9 @@ wheels = [
name = "pathspec"
version = "1.0.3"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/4c/b2/bb8e495d5262bfec41ab5cb18f522f1012933347fb5d9e62452d446baca2/pathspec-1.0.3.tar.gz", hash = "sha256:bac5cf97ae2c2876e2d25ebb15078eb04d76e4b98921ee31c6f85ade8b59444d", size = 130841, upload-time = "2026-01-09T15:46:46.009Z" }
sdist = { url = "https://files.pythonhosted.org/packages/4c/b2/bb8e495d5262bfec41ab5cb18f522f1012933347fb5d9e62452d446baca2/pathspec-1.0.3.tar.gz", hash = "sha256:bac5cf97ae2c2876e2d25ebb15078eb04d76e4b98921ee31c6f85ade8b59444d", size = 130841 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/32/2b/121e912bd60eebd623f873fd090de0e84f322972ab25a7f9044c056804ed/pathspec-1.0.3-py3-none-any.whl", hash = "sha256:e80767021c1cc524aa3fb14bedda9c34406591343cc42797b386ce7b9354fb6c", size = 55021, upload-time = "2026-01-09T15:46:44.652Z" },
{ url = "https://files.pythonhosted.org/packages/32/2b/121e912bd60eebd623f873fd090de0e84f322972ab25a7f9044c056804ed/pathspec-1.0.3-py3-none-any.whl", hash = "sha256:e80767021c1cc524aa3fb14bedda9c34406591343cc42797b386ce7b9354fb6c", size = 55021 },
]
[[package]]
@ -6606,7 +6622,7 @@ requires-dist = [
{ name = "chonkie", extras = ["all"], specifier = ">=1.5.0" },
{ name = "composio", specifier = ">=0.10.9" },
{ name = "datasets", specifier = ">=2.21.0" },
{ name = "deepagents", specifier = ">=0.3.0" },
{ name = "deepagents", specifier = ">=0.3.8" },
{ name = "discord-py", specifier = ">=2.5.2" },
{ name = "docling", specifier = ">=2.15.0" },
{ name = "elasticsearch", specifier = ">=9.1.1" },
@ -6622,7 +6638,7 @@ requires-dist = [
{ name = "google-api-python-client", specifier = ">=2.156.0" },
{ name = "google-auth-oauthlib", specifier = ">=1.2.1" },
{ name = "kokoro", specifier = ">=0.9.4" },
{ name = "langchain", specifier = ">=1.2.0" },
{ name = "langchain", specifier = ">=1.2.6" },
{ name = "langchain-community", specifier = ">=0.3.31" },
{ name = "langchain-litellm", specifier = ">=0.3.5" },
{ name = "langchain-unstructured", specifier = ">=1.0.0" },