mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-16 21:05:20 +02:00
Merge remote-tracking branch 'upstream/dev' into feat/composio
This commit is contained in:
commit
fae52345f8
65 changed files with 3291 additions and 153 deletions
29
surfsense_backend/alembic/versions/74_no_op.py
Normal file
29
surfsense_backend/alembic/versions/74_no_op.py
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
"""No-op migration for Composio support
|
||||
|
||||
Revision ID: 74
|
||||
Revises: 73
|
||||
Create Date: 2026-01-21
|
||||
|
||||
NOTE: This migration is a no-op since Composio is not supported yet.
|
||||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "74"
|
||||
down_revision: str | None = "73"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""No-op upgrade for Composio support."""
|
||||
pass
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""No-op downgrade for Composio support.
|
||||
|
||||
Note: PostgreSQL does not support removing enum values directly.
|
||||
"""
|
||||
pass
|
||||
|
|
@ -0,0 +1,75 @@
|
|||
"""Add chat_session_state table for live collaboration
|
||||
|
||||
Revision ID: 75
|
||||
Revises: 74
|
||||
|
||||
Creates chat_session_state table to track AI responding state per thread.
|
||||
Enables real-time sync via Electric SQL for shared chat collaboration.
|
||||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
|
||||
from alembic import op
|
||||
|
||||
revision: str = "75"
|
||||
down_revision: str | None = "74"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Create chat_session_state table with Electric SQL replication."""
|
||||
op.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS chat_session_state (
|
||||
id SERIAL PRIMARY KEY,
|
||||
thread_id INTEGER NOT NULL REFERENCES new_chat_threads(id) ON DELETE CASCADE,
|
||||
ai_responding_to_user_id UUID REFERENCES "user"(id) ON DELETE SET NULL,
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
UNIQUE (thread_id)
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
op.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_chat_session_state_thread_id ON chat_session_state(thread_id)"
|
||||
)
|
||||
|
||||
op.execute("ALTER TABLE chat_session_state REPLICA IDENTITY FULL;")
|
||||
|
||||
op.execute(
|
||||
"""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_publication_tables
|
||||
WHERE pubname = 'electric_publication_default'
|
||||
AND tablename = 'chat_session_state'
|
||||
) THEN
|
||||
ALTER PUBLICATION electric_publication_default ADD TABLE chat_session_state;
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Drop chat_session_state table and remove from Electric SQL replication."""
|
||||
op.execute(
|
||||
"""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (
|
||||
SELECT 1 FROM pg_publication_tables
|
||||
WHERE pubname = 'electric_publication_default'
|
||||
AND tablename = 'chat_session_state'
|
||||
) THEN
|
||||
ALTER PUBLICATION electric_publication_default DROP TABLE chat_session_state;
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
"""
|
||||
)
|
||||
|
||||
op.execute("DROP TABLE IF EXISTS chat_session_state;")
|
||||
|
|
@ -0,0 +1,99 @@
|
|||
"""Add live collaboration tables to Electric SQL publication
|
||||
|
||||
Revision ID: 76
|
||||
Revises: 75
|
||||
|
||||
Enables real-time sync for live collaboration features:
|
||||
- new_chat_messages: Live message sync between users
|
||||
- chat_comments: Live comment updates
|
||||
|
||||
Note: User/member info is fetched via API (membersAtom) for client-side joins,
|
||||
not via Electric SQL, to keep where clauses optimized and reduce complexity.
|
||||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
|
||||
from alembic import op
|
||||
|
||||
revision: str = "76"
|
||||
down_revision: str | None = "75"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Add live collaboration tables to Electric SQL replication."""
|
||||
# Set REPLICA IDENTITY FULL for Electric SQL sync
|
||||
op.execute("ALTER TABLE new_chat_messages REPLICA IDENTITY FULL;")
|
||||
op.execute("ALTER TABLE chat_comments REPLICA IDENTITY FULL;")
|
||||
|
||||
# Add new_chat_messages to Electric publication
|
||||
op.execute(
|
||||
"""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_publication_tables
|
||||
WHERE pubname = 'electric_publication_default'
|
||||
AND tablename = 'new_chat_messages'
|
||||
) THEN
|
||||
ALTER PUBLICATION electric_publication_default ADD TABLE new_chat_messages;
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
"""
|
||||
)
|
||||
|
||||
# Add chat_comments to Electric publication
|
||||
op.execute(
|
||||
"""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_publication_tables
|
||||
WHERE pubname = 'electric_publication_default'
|
||||
AND tablename = 'chat_comments'
|
||||
) THEN
|
||||
ALTER PUBLICATION electric_publication_default ADD TABLE chat_comments;
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Remove live collaboration tables from Electric SQL replication."""
|
||||
op.execute(
|
||||
"""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (
|
||||
SELECT 1 FROM pg_publication_tables
|
||||
WHERE pubname = 'electric_publication_default'
|
||||
AND tablename = 'new_chat_messages'
|
||||
) THEN
|
||||
ALTER PUBLICATION electric_publication_default DROP TABLE new_chat_messages;
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
"""
|
||||
)
|
||||
|
||||
op.execute(
|
||||
"""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (
|
||||
SELECT 1 FROM pg_publication_tables
|
||||
WHERE pubname = 'electric_publication_default'
|
||||
AND tablename = 'chat_comments'
|
||||
) THEN
|
||||
ALTER PUBLICATION electric_publication_default DROP TABLE chat_comments;
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
"""
|
||||
)
|
||||
|
||||
# Note: Not reverting REPLICA IDENTITY as it doesn't harm normal operations
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
"""Add thread_id to chat_comments for denormalized Electric subscriptions
|
||||
|
||||
This denormalization allows a single Electric SQL subscription per thread
|
||||
instead of one per message, significantly reducing connection overhead.
|
||||
|
||||
Revision ID: 77
|
||||
Revises: 76
|
||||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
|
||||
from alembic import op
|
||||
|
||||
revision: str = "77"
|
||||
down_revision: str | None = "76"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Add thread_id column to chat_comments and backfill from messages."""
|
||||
# Add the column (nullable initially for backfill)
|
||||
op.execute(
|
||||
"""
|
||||
ALTER TABLE chat_comments
|
||||
ADD COLUMN IF NOT EXISTS thread_id INTEGER;
|
||||
"""
|
||||
)
|
||||
|
||||
# Backfill thread_id from the related message
|
||||
op.execute(
|
||||
"""
|
||||
UPDATE chat_comments c
|
||||
SET thread_id = m.thread_id
|
||||
FROM new_chat_messages m
|
||||
WHERE c.message_id = m.id
|
||||
AND c.thread_id IS NULL;
|
||||
"""
|
||||
)
|
||||
|
||||
# Make it NOT NULL after backfill
|
||||
op.execute(
|
||||
"""
|
||||
ALTER TABLE chat_comments
|
||||
ALTER COLUMN thread_id SET NOT NULL;
|
||||
"""
|
||||
)
|
||||
|
||||
# Add FK constraint
|
||||
op.execute(
|
||||
"""
|
||||
ALTER TABLE chat_comments
|
||||
ADD CONSTRAINT fk_chat_comments_thread_id
|
||||
FOREIGN KEY (thread_id) REFERENCES new_chat_threads(id) ON DELETE CASCADE;
|
||||
"""
|
||||
)
|
||||
|
||||
# Add index for efficient Electric subscriptions by thread
|
||||
op.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_chat_comments_thread_id ON chat_comments(thread_id)"
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Remove thread_id column from chat_comments."""
|
||||
op.execute("DROP INDEX IF EXISTS idx_chat_comments_thread_id")
|
||||
op.execute(
|
||||
"ALTER TABLE chat_comments DROP CONSTRAINT IF EXISTS fk_chat_comments_thread_id"
|
||||
)
|
||||
op.execute("ALTER TABLE chat_comments DROP COLUMN IF EXISTS thread_id")
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
"""Add Obsidian connector enums
|
||||
|
||||
Revision ID: 78
|
||||
Revises: 77
|
||||
Create Date: 2026-01-21
|
||||
|
||||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "78"
|
||||
down_revision: str | None = "77"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Add OBSIDIAN_CONNECTOR to documenttype enum
|
||||
op.execute("ALTER TYPE documenttype ADD VALUE IF NOT EXISTS 'OBSIDIAN_CONNECTOR'")
|
||||
|
||||
# Add OBSIDIAN_CONNECTOR to searchsourceconnectortype enum
|
||||
op.execute(
|
||||
"ALTER TYPE searchsourceconnectortype ADD VALUE IF NOT EXISTS 'OBSIDIAN_CONNECTOR'"
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# Note: PostgreSQL doesn't support removing enum values directly.
|
||||
# The values will remain in the enum type but won't be used.
|
||||
pass
|
||||
|
|
@ -1,8 +1,7 @@
|
|||
"""Add Composio connector types to SearchSourceConnectorType and DocumentType enums
|
||||
|
||||
Revision ID: 74
|
||||
Revises: 73
|
||||
Create Date: 2026-01-21
|
||||
Revision ID: 79
|
||||
Revises: 78
|
||||
|
||||
This migration adds the Composio connector enum values to both:
|
||||
- searchsourceconnectortype (for connector type tracking)
|
||||
|
|
@ -23,8 +22,8 @@ from collections.abc import Sequence
|
|||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "74"
|
||||
down_revision: str | None = "73"
|
||||
revision: str = "79"
|
||||
down_revision: str | None = "78"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
|
|
@ -34,6 +34,12 @@ You have access to the following tools:
|
|||
- Returns: Documentation content with chunk IDs for citations (prefixed with 'doc-', e.g., [citation:doc-123])
|
||||
|
||||
1. search_knowledge_base: Search the user's personal knowledge base for relevant information.
|
||||
- IMPORTANT: When searching for information (meetings, schedules, notes, tasks, etc.), ALWAYS search broadly
|
||||
across ALL sources first by omitting connectors_to_search. The user may store information in various places
|
||||
including calendar apps, note-taking apps (Obsidian, Notion), chat apps (Slack, Discord), and more.
|
||||
- Only narrow to specific connectors if the user explicitly asks (e.g., "check my Slack" or "in my calendar").
|
||||
- Personal notes in Obsidian, Notion, or NOTE often contain schedules, meeting times, reminders, and other
|
||||
important information that may not be in calendars.
|
||||
- Args:
|
||||
- query: The search query - be specific and include key terms
|
||||
- top_k: Number of results to retrieve (default: 10)
|
||||
|
|
@ -157,6 +163,13 @@ You have access to the following tools:
|
|||
stating "Based on your memory..." - integrate the context seamlessly.
|
||||
</tools>
|
||||
<tool_call_examples>
|
||||
- User: "What time is the team meeting today?"
|
||||
- Call: `search_knowledge_base(query="team meeting time today")` (searches ALL sources - calendar, notes, Obsidian, etc.)
|
||||
- DO NOT limit to just calendar - the info might be in notes!
|
||||
|
||||
- User: "When is my gym session?"
|
||||
- Call: `search_knowledge_base(query="gym session time schedule")` (searches ALL sources)
|
||||
|
||||
- User: "How do I install SurfSense?"
|
||||
- Call: `search_surfsense_docs(query="installation setup")`
|
||||
|
||||
|
|
@ -175,6 +188,12 @@ You have access to the following tools:
|
|||
- User: "What did I discuss on Slack last week about the React migration?"
|
||||
- Call: `search_knowledge_base(query="React migration", connectors_to_search=["SLACK_CONNECTOR"], start_date="YYYY-MM-DD", end_date="YYYY-MM-DD")`
|
||||
|
||||
- User: "Check my Obsidian notes for meeting notes"
|
||||
- Call: `search_knowledge_base(query="meeting notes", connectors_to_search=["OBSIDIAN_CONNECTOR"])`
|
||||
|
||||
- User: "What's in my Obsidian vault about project ideas?"
|
||||
- Call: `search_knowledge_base(query="project ideas", connectors_to_search=["OBSIDIAN_CONNECTOR"])`
|
||||
|
||||
- User: "Remember that I prefer TypeScript over JavaScript"
|
||||
- Call: `save_memory(content="User prefers TypeScript over JavaScript for development", category="preference")`
|
||||
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ _ALL_CONNECTORS: list[str] = [
|
|||
"BOOKSTACK_CONNECTOR",
|
||||
"CRAWLED_URL",
|
||||
"CIRCLEBACK",
|
||||
"OBSIDIAN_CONNECTOR",
|
||||
]
|
||||
|
||||
|
||||
|
|
@ -508,6 +509,16 @@ async def search_knowledge_base_async(
|
|||
)
|
||||
all_documents.extend(chunks)
|
||||
|
||||
elif connector == "OBSIDIAN_CONNECTOR":
|
||||
_, chunks = await connector_service.search_obsidian(
|
||||
user_query=query,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
start_date=resolved_start_date,
|
||||
end_date=resolved_end_date,
|
||||
)
|
||||
all_documents.extend(chunks)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error searching connector {connector}: {e}")
|
||||
continue
|
||||
|
|
@ -596,6 +607,7 @@ def create_search_knowledge_base_tool(
|
|||
- WEBCRAWLER_CONNECTOR: "Webpages indexed by SurfSense" (personally selected websites)
|
||||
- BOOKSTACK_CONNECTOR: "BookStack pages" (personal documentation)
|
||||
- CIRCLEBACK: "Circleback meeting notes, transcripts, and action items" (personal meeting records)
|
||||
- OBSIDIAN_CONNECTOR: "Obsidian vault notes and markdown files" (personal notes and knowledge management)
|
||||
|
||||
NOTE: `WEBCRAWLER_CONNECTOR` is mapped internally to the canonical document type `CRAWLED_URL`.
|
||||
|
||||
|
|
|
|||
|
|
@ -61,6 +61,21 @@ class Config:
|
|||
"FFmpeg is not installed on the system. Please install it to use the Surfsense Podcaster."
|
||||
)
|
||||
|
||||
# Deployment Mode (self-hosted or cloud)
|
||||
# self-hosted: Full access to local file system connectors (Obsidian, etc.)
|
||||
# cloud: Only cloud-based connectors available
|
||||
DEPLOYMENT_MODE = os.getenv("SURFSENSE_DEPLOYMENT_MODE", "self-hosted")
|
||||
|
||||
@classmethod
|
||||
def is_self_hosted(cls) -> bool:
|
||||
"""Check if running in self-hosted mode."""
|
||||
return cls.DEPLOYMENT_MODE == "self-hosted"
|
||||
|
||||
@classmethod
|
||||
def is_cloud(cls) -> bool:
|
||||
"""Check if running in cloud mode."""
|
||||
return cls.DEPLOYMENT_MODE == "cloud"
|
||||
|
||||
# Database
|
||||
DATABASE_URL = os.getenv("DATABASE_URL")
|
||||
|
||||
|
|
|
|||
|
|
@ -53,6 +53,7 @@ class DocumentType(str, Enum):
|
|||
ELASTICSEARCH_CONNECTOR = "ELASTICSEARCH_CONNECTOR"
|
||||
BOOKSTACK_CONNECTOR = "BOOKSTACK_CONNECTOR"
|
||||
CIRCLEBACK = "CIRCLEBACK"
|
||||
OBSIDIAN_CONNECTOR = "OBSIDIAN_CONNECTOR"
|
||||
NOTE = "NOTE"
|
||||
COMPOSIO_GOOGLE_DRIVE_CONNECTOR = "COMPOSIO_GOOGLE_DRIVE_CONNECTOR"
|
||||
COMPOSIO_GMAIL_CONNECTOR = "COMPOSIO_GMAIL_CONNECTOR"
|
||||
|
|
@ -83,6 +84,9 @@ class SearchSourceConnectorType(str, Enum):
|
|||
WEBCRAWLER_CONNECTOR = "WEBCRAWLER_CONNECTOR"
|
||||
BOOKSTACK_CONNECTOR = "BOOKSTACK_CONNECTOR"
|
||||
CIRCLEBACK_CONNECTOR = "CIRCLEBACK_CONNECTOR"
|
||||
OBSIDIAN_CONNECTOR = (
|
||||
"OBSIDIAN_CONNECTOR" # Self-hosted only - Local Obsidian vault indexing
|
||||
)
|
||||
MCP_CONNECTOR = "MCP_CONNECTOR" # Model Context Protocol - User-defined API tools
|
||||
COMPOSIO_GOOGLE_DRIVE_CONNECTOR = "COMPOSIO_GOOGLE_DRIVE_CONNECTOR"
|
||||
COMPOSIO_GMAIL_CONNECTOR = "COMPOSIO_GMAIL_CONNECTOR"
|
||||
|
|
@ -419,6 +423,13 @@ class ChatComment(BaseModel, TimestampMixin):
|
|||
nullable=False,
|
||||
index=True,
|
||||
)
|
||||
# Denormalized thread_id for efficient Electric SQL subscriptions (one per thread)
|
||||
thread_id = Column(
|
||||
Integer,
|
||||
ForeignKey("new_chat_threads.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
index=True,
|
||||
)
|
||||
parent_id = Column(
|
||||
Integer,
|
||||
ForeignKey("chat_comments.id", ondelete="CASCADE"),
|
||||
|
|
@ -442,6 +453,7 @@ class ChatComment(BaseModel, TimestampMixin):
|
|||
|
||||
# Relationships
|
||||
message = relationship("NewChatMessage", back_populates="comments")
|
||||
thread = relationship("NewChatThread")
|
||||
author = relationship("User")
|
||||
parent = relationship(
|
||||
"ChatComment", remote_side="ChatComment.id", backref="replies"
|
||||
|
|
@ -478,6 +490,38 @@ class ChatCommentMention(BaseModel, TimestampMixin):
|
|||
mentioned_user = relationship("User")
|
||||
|
||||
|
||||
class ChatSessionState(BaseModel):
|
||||
"""
|
||||
Tracks real-time session state for shared chat collaboration.
|
||||
One record per thread, synced via Electric SQL.
|
||||
"""
|
||||
|
||||
__tablename__ = "chat_session_state"
|
||||
|
||||
thread_id = Column(
|
||||
Integer,
|
||||
ForeignKey("new_chat_threads.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
unique=True,
|
||||
index=True,
|
||||
)
|
||||
ai_responding_to_user_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("user.id", ondelete="SET NULL"),
|
||||
nullable=True,
|
||||
index=True,
|
||||
)
|
||||
updated_at = Column(
|
||||
TIMESTAMP(timezone=True),
|
||||
nullable=False,
|
||||
default=lambda: datetime.now(UTC),
|
||||
onupdate=lambda: datetime.now(UTC),
|
||||
)
|
||||
|
||||
thread = relationship("NewChatThread")
|
||||
ai_responding_to_user = relationship("User")
|
||||
|
||||
|
||||
class MemoryCategory(str, Enum):
|
||||
"""Categories for user memories."""
|
||||
|
||||
|
|
|
|||
|
|
@ -990,7 +990,7 @@ async def handle_new_chat(
|
|||
search_space_id=request.search_space_id,
|
||||
chat_id=request.chat_id,
|
||||
session=session,
|
||||
user_id=str(user.id), # Pass user ID for memory tools
|
||||
user_id=str(user.id), # Pass user ID for memory tools and session state
|
||||
llm_config_id=llm_config_id,
|
||||
attachments=request.attachments,
|
||||
mentioned_document_ids=request.mentioned_document_ids,
|
||||
|
|
|
|||
|
|
@ -901,6 +901,25 @@ async def index_connector_content(
|
|||
)
|
||||
response_message = "Web page indexing started in the background."
|
||||
|
||||
elif connector.connector_type == SearchSourceConnectorType.OBSIDIAN_CONNECTOR:
|
||||
from app.config import config as app_config
|
||||
from app.tasks.celery_tasks.connector_tasks import index_obsidian_vault_task
|
||||
|
||||
# Obsidian connector only available in self-hosted mode
|
||||
if not app_config.is_self_hosted():
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Obsidian connector is only available in self-hosted mode",
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Triggering Obsidian vault indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"
|
||||
)
|
||||
index_obsidian_vault_task.delay(
|
||||
connector_id, search_space_id, str(user.id), indexing_from, indexing_to
|
||||
)
|
||||
response_message = "Obsidian vault indexing started in the background."
|
||||
|
||||
elif (
|
||||
connector.connector_type
|
||||
== SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR
|
||||
|
|
@ -2195,6 +2214,58 @@ async def run_bookstack_indexing(
|
|||
)
|
||||
|
||||
|
||||
# Add new helper functions for Obsidian indexing
|
||||
async def run_obsidian_indexing_with_new_session(
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
):
|
||||
"""Wrapper to run Obsidian indexing with its own database session."""
|
||||
logger.info(
|
||||
f"Background task started: Indexing Obsidian connector {connector_id} into space {search_space_id} from {start_date} to {end_date}"
|
||||
)
|
||||
async with async_session_maker() as session:
|
||||
await run_obsidian_indexing(
|
||||
session, connector_id, search_space_id, user_id, start_date, end_date
|
||||
)
|
||||
logger.info(f"Background task finished: Indexing Obsidian connector {connector_id}")
|
||||
|
||||
|
||||
async def run_obsidian_indexing(
|
||||
session: AsyncSession,
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
):
|
||||
"""
|
||||
Background task to run Obsidian vault indexing.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
connector_id: ID of the Obsidian connector
|
||||
search_space_id: ID of the search space
|
||||
user_id: ID of the user
|
||||
start_date: Start date for indexing
|
||||
end_date: End date for indexing
|
||||
"""
|
||||
from app.tasks.connector_indexers import index_obsidian_vault
|
||||
|
||||
await _run_indexing_with_notifications(
|
||||
session=session,
|
||||
connector_id=connector_id,
|
||||
search_space_id=search_space_id,
|
||||
user_id=user_id,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
indexing_function=index_obsidian_vault,
|
||||
update_timestamp_func=_update_connector_timestamp_by_id,
|
||||
)
|
||||
|
||||
|
||||
async def run_composio_indexing_with_new_session(
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
|
|
|
|||
29
surfsense_backend/app/schemas/chat_session_state.py
Normal file
29
surfsense_backend/app/schemas/chat_session_state.py
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
"""
|
||||
Pydantic schemas for chat session state (live collaboration).
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from uuid import UUID
|
||||
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
|
||||
|
||||
class RespondingUser(BaseModel):
|
||||
"""The user that the AI is currently responding to."""
|
||||
|
||||
id: UUID
|
||||
display_name: str | None = None
|
||||
email: str
|
||||
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
|
||||
class ChatSessionStateResponse(BaseModel):
|
||||
"""Current session state for a chat thread."""
|
||||
|
||||
id: int
|
||||
thread_id: int
|
||||
responding_to: RespondingUser | None = None
|
||||
updated_at: datetime
|
||||
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
59
surfsense_backend/app/schemas/obsidian_auth_credentials.py
Normal file
59
surfsense_backend/app/schemas/obsidian_auth_credentials.py
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
"""
|
||||
Obsidian Connector Credentials Schema.
|
||||
|
||||
Obsidian is a local-first note-taking app that stores notes as markdown files.
|
||||
This connector supports indexing from local file system (self-hosted only).
|
||||
"""
|
||||
|
||||
from pydantic import BaseModel, field_validator
|
||||
|
||||
|
||||
class ObsidianAuthCredentialsBase(BaseModel):
|
||||
"""
|
||||
Credentials/configuration for the Obsidian connector.
|
||||
|
||||
Since Obsidian vaults are local directories, this schema primarily
|
||||
holds the vault path and configuration options rather than API tokens.
|
||||
"""
|
||||
|
||||
vault_path: str
|
||||
vault_name: str | None = None
|
||||
exclude_folders: list[str] | None = None
|
||||
include_attachments: bool = False
|
||||
|
||||
@field_validator("vault_path")
|
||||
@classmethod
|
||||
def validate_vault_path(cls, v: str) -> str:
|
||||
"""Ensure vault path is provided and stripped of whitespace."""
|
||||
if not v or not v.strip():
|
||||
raise ValueError("Vault path is required")
|
||||
return v.strip()
|
||||
|
||||
@field_validator("exclude_folders", mode="before")
|
||||
@classmethod
|
||||
def parse_exclude_folders(cls, v):
|
||||
"""Parse exclude_folders from string if needed."""
|
||||
if v is None:
|
||||
return [".trash", ".obsidian", "templates"]
|
||||
if isinstance(v, str):
|
||||
return [f.strip() for f in v.split(",") if f.strip()]
|
||||
return v
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert credentials to dictionary for storage."""
|
||||
return {
|
||||
"vault_path": self.vault_path,
|
||||
"vault_name": self.vault_name,
|
||||
"exclude_folders": self.exclude_folders,
|
||||
"include_attachments": self.include_attachments,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "ObsidianAuthCredentialsBase":
|
||||
"""Create credentials from dictionary."""
|
||||
return cls(
|
||||
vault_path=data.get("vault_path", ""),
|
||||
vault_name=data.get("vault_name"),
|
||||
exclude_folders=data.get("exclude_folders"),
|
||||
include_attachments=data.get("include_attachments", False),
|
||||
)
|
||||
|
|
@ -281,8 +281,10 @@ async def create_comment(
|
|||
detail="You don't have permission to create comments in this search space",
|
||||
)
|
||||
|
||||
thread = message.thread
|
||||
comment = ChatComment(
|
||||
message_id=message_id,
|
||||
thread_id=thread.id, # Denormalized for efficient Electric subscriptions
|
||||
author_id=user.id,
|
||||
content=content,
|
||||
)
|
||||
|
|
@ -299,7 +301,6 @@ async def create_comment(
|
|||
user_names = await get_user_names_for_mentions(session, set(mentions_map.keys()))
|
||||
|
||||
# Create notifications for mentioned users (excluding author)
|
||||
thread = message.thread
|
||||
author_name = user.display_name or user.email
|
||||
content_preview = render_mentions(content, user_names)
|
||||
for mentioned_user_id, mention_id in mentions_map.items():
|
||||
|
|
@ -393,8 +394,10 @@ async def create_reply(
|
|||
detail="You don't have permission to create comments in this search space",
|
||||
)
|
||||
|
||||
thread = parent_comment.message.thread
|
||||
reply = ChatComment(
|
||||
message_id=parent_comment.message_id,
|
||||
thread_id=thread.id, # Denormalized for efficient Electric subscriptions
|
||||
parent_id=comment_id,
|
||||
author_id=user.id,
|
||||
content=content,
|
||||
|
|
@ -412,7 +415,6 @@ async def create_reply(
|
|||
user_names = await get_user_names_for_mentions(session, set(mentions_map.keys()))
|
||||
|
||||
# Create notifications for mentioned users (excluding author)
|
||||
thread = parent_comment.message.thread
|
||||
author_name = user.display_name or user.email
|
||||
content_preview = render_mentions(content, user_names)
|
||||
for mentioned_user_id, mention_id in mentions_map.items():
|
||||
|
|
|
|||
65
surfsense_backend/app/services/chat_session_state_service.py
Normal file
65
surfsense_backend/app/services/chat_session_state_service.py
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
"""
|
||||
Service layer for chat session state (live collaboration).
|
||||
"""
|
||||
|
||||
from datetime import UTC, datetime
|
||||
from uuid import UUID
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.dialects.postgresql import insert
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.orm import selectinload
|
||||
|
||||
from app.db import ChatSessionState
|
||||
|
||||
|
||||
async def get_session_state(
|
||||
session: AsyncSession,
|
||||
thread_id: int,
|
||||
) -> ChatSessionState | None:
|
||||
"""Get the current session state for a thread."""
|
||||
result = await session.execute(
|
||||
select(ChatSessionState)
|
||||
.options(selectinload(ChatSessionState.ai_responding_to_user))
|
||||
.filter(ChatSessionState.thread_id == thread_id)
|
||||
)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
|
||||
async def set_ai_responding(
|
||||
session: AsyncSession,
|
||||
thread_id: int,
|
||||
user_id: UUID,
|
||||
) -> ChatSessionState:
|
||||
"""Mark AI as responding to a specific user. Uses upsert for atomicity."""
|
||||
now = datetime.now(UTC)
|
||||
upsert_query = insert(ChatSessionState).values(
|
||||
thread_id=thread_id,
|
||||
ai_responding_to_user_id=user_id,
|
||||
updated_at=now,
|
||||
)
|
||||
upsert_query = upsert_query.on_conflict_do_update(
|
||||
index_elements=["thread_id"],
|
||||
set_={
|
||||
"ai_responding_to_user_id": user_id,
|
||||
"updated_at": now,
|
||||
},
|
||||
)
|
||||
await session.execute(upsert_query)
|
||||
await session.commit()
|
||||
|
||||
return await get_session_state(session, thread_id)
|
||||
|
||||
|
||||
async def clear_ai_responding(
|
||||
session: AsyncSession,
|
||||
thread_id: int,
|
||||
) -> ChatSessionState | None:
|
||||
"""Clear AI responding state when response is complete."""
|
||||
state = await get_session_state(session, thread_id)
|
||||
if state:
|
||||
state.ai_responding_to_user_id = None
|
||||
state.updated_at = datetime.now(UTC)
|
||||
await session.commit()
|
||||
await session.refresh(state)
|
||||
return state
|
||||
|
|
@ -2780,3 +2780,94 @@ class ConnectorService:
|
|||
}
|
||||
|
||||
return result_object, circleback_docs
|
||||
|
||||
async def search_obsidian(
|
||||
self,
|
||||
user_query: str,
|
||||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> tuple:
|
||||
"""
|
||||
Search for Obsidian vault notes and return both the source information and langchain documents.
|
||||
|
||||
Uses combined chunk-level and document-level hybrid search with RRF fusion.
|
||||
|
||||
Args:
|
||||
user_query: The user's query
|
||||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
start_date: Optional start date for filtering documents by updated_at
|
||||
end_date: Optional end date for filtering documents by updated_at
|
||||
|
||||
Returns:
|
||||
tuple: (sources_info, langchain_documents)
|
||||
"""
|
||||
obsidian_docs = await self._combined_rrf_search(
|
||||
query_text=user_query,
|
||||
search_space_id=search_space_id,
|
||||
document_type="OBSIDIAN_CONNECTOR",
|
||||
top_k=top_k,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
||||
# Early return if no results
|
||||
if not obsidian_docs:
|
||||
return {
|
||||
"id": 53,
|
||||
"name": "Obsidian Vault",
|
||||
"type": "OBSIDIAN_CONNECTOR",
|
||||
"sources": [],
|
||||
}, []
|
||||
|
||||
def _title_fn(doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
|
||||
return doc_info.get("title", "Untitled Note")
|
||||
|
||||
def _url_fn(doc_info: dict[str, Any], metadata: dict[str, Any]) -> str:
|
||||
# Obsidian URL format: obsidian://vault_name/path
|
||||
return doc_info.get("url", "")
|
||||
|
||||
def _description_fn(
|
||||
chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
|
||||
) -> str:
|
||||
description = self._chunk_preview(chunk.get("content", ""), limit=200)
|
||||
info_parts = []
|
||||
vault_name = metadata.get("vault_name")
|
||||
tags = metadata.get("tags", [])
|
||||
if vault_name:
|
||||
info_parts.append(f"Vault: {vault_name}")
|
||||
if tags and isinstance(tags, list) and len(tags) > 0:
|
||||
info_parts.append(f"Tags: {', '.join(tags[:3])}")
|
||||
if info_parts:
|
||||
description = (description + " | " + " | ".join(info_parts)).strip(" |")
|
||||
return description
|
||||
|
||||
def _extra_fields_fn(
|
||||
_chunk: dict[str, Any], _doc_info: dict[str, Any], metadata: dict[str, Any]
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"vault_name": metadata.get("vault_name", ""),
|
||||
"file_path": metadata.get("file_path", ""),
|
||||
"tags": metadata.get("tags", []),
|
||||
"outgoing_links": metadata.get("outgoing_links", []),
|
||||
}
|
||||
|
||||
sources_list = self._build_chunk_sources_from_documents(
|
||||
obsidian_docs,
|
||||
title_fn=_title_fn,
|
||||
url_fn=_url_fn,
|
||||
description_fn=_description_fn,
|
||||
extra_fields_fn=_extra_fields_fn,
|
||||
)
|
||||
|
||||
# Create result object
|
||||
result_object = {
|
||||
"id": 53,
|
||||
"name": "Obsidian Vault",
|
||||
"type": "OBSIDIAN_CONNECTOR",
|
||||
"sources": sources_list,
|
||||
}
|
||||
|
||||
return result_object, obsidian_docs
|
||||
|
|
|
|||
|
|
@ -623,6 +623,28 @@ class MentionNotificationHandler(BaseNotificationHandler):
|
|||
def __init__(self):
|
||||
super().__init__("new_mention")
|
||||
|
||||
async def find_notification_by_mention(
|
||||
self,
|
||||
session: AsyncSession,
|
||||
mention_id: int,
|
||||
) -> Notification | None:
|
||||
"""
|
||||
Find an existing notification by mention ID.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
mention_id: The mention ID to search for
|
||||
|
||||
Returns:
|
||||
Notification if found, None otherwise
|
||||
"""
|
||||
query = select(Notification).where(
|
||||
Notification.type == self.notification_type,
|
||||
Notification.notification_metadata["mention_id"].astext == str(mention_id),
|
||||
)
|
||||
result = await session.execute(query)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
async def notify_new_mention(
|
||||
self,
|
||||
session: AsyncSession,
|
||||
|
|
@ -641,11 +663,12 @@ class MentionNotificationHandler(BaseNotificationHandler):
|
|||
) -> Notification:
|
||||
"""
|
||||
Create notification when a user is @mentioned in a comment.
|
||||
Uses mention_id for idempotency to prevent duplicate notifications.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
mentioned_user_id: User who was mentioned
|
||||
mention_id: ID of the mention record
|
||||
mention_id: ID of the mention record (used for idempotency)
|
||||
comment_id: ID of the comment containing the mention
|
||||
message_id: ID of the message being commented on
|
||||
thread_id: ID of the chat thread
|
||||
|
|
@ -658,8 +681,16 @@ class MentionNotificationHandler(BaseNotificationHandler):
|
|||
search_space_id: Search space ID
|
||||
|
||||
Returns:
|
||||
Notification: The created notification
|
||||
Notification: The created or existing notification
|
||||
"""
|
||||
# Check if notification already exists for this mention (idempotency)
|
||||
existing = await self.find_notification_by_mention(session, mention_id)
|
||||
if existing:
|
||||
logger.info(
|
||||
f"Notification already exists for mention {mention_id}, returning existing"
|
||||
)
|
||||
return existing
|
||||
|
||||
title = f"{author_name} mentioned you"
|
||||
message = content_preview[:100] + ("..." if len(content_preview) > 100 else "")
|
||||
|
||||
|
|
@ -676,21 +707,37 @@ class MentionNotificationHandler(BaseNotificationHandler):
|
|||
"content_preview": content_preview[:200],
|
||||
}
|
||||
|
||||
notification = Notification(
|
||||
user_id=mentioned_user_id,
|
||||
search_space_id=search_space_id,
|
||||
type=self.notification_type,
|
||||
title=title,
|
||||
message=message,
|
||||
notification_metadata=metadata,
|
||||
)
|
||||
session.add(notification)
|
||||
await session.commit()
|
||||
await session.refresh(notification)
|
||||
logger.info(
|
||||
f"Created new_mention notification {notification.id} for user {mentioned_user_id}"
|
||||
)
|
||||
return notification
|
||||
try:
|
||||
notification = Notification(
|
||||
user_id=mentioned_user_id,
|
||||
search_space_id=search_space_id,
|
||||
type=self.notification_type,
|
||||
title=title,
|
||||
message=message,
|
||||
notification_metadata=metadata,
|
||||
)
|
||||
session.add(notification)
|
||||
await session.commit()
|
||||
await session.refresh(notification)
|
||||
logger.info(
|
||||
f"Created new_mention notification {notification.id} for user {mentioned_user_id}"
|
||||
)
|
||||
return notification
|
||||
except Exception as e:
|
||||
# Handle race condition - if duplicate key error, try to fetch existing
|
||||
await session.rollback()
|
||||
if (
|
||||
"duplicate key" in str(e).lower()
|
||||
or "unique constraint" in str(e).lower()
|
||||
):
|
||||
logger.warning(
|
||||
f"Duplicate notification detected for mention {mention_id}, fetching existing"
|
||||
)
|
||||
existing = await self.find_notification_by_mention(session, mention_id)
|
||||
if existing:
|
||||
return existing
|
||||
# Re-raise if not a duplicate key error or couldn't find existing
|
||||
raise
|
||||
|
||||
|
||||
class NotificationService:
|
||||
|
|
|
|||
|
|
@ -761,6 +761,49 @@ async def _index_bookstack_pages(
|
|||
)
|
||||
|
||||
|
||||
@celery_app.task(name="index_obsidian_vault", bind=True)
|
||||
def index_obsidian_vault_task(
|
||||
self,
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
):
|
||||
"""Celery task to index Obsidian vault notes."""
|
||||
import asyncio
|
||||
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
try:
|
||||
loop.run_until_complete(
|
||||
_index_obsidian_vault(
|
||||
connector_id, search_space_id, user_id, start_date, end_date
|
||||
)
|
||||
)
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
|
||||
async def _index_obsidian_vault(
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
):
|
||||
"""Index Obsidian vault with new session."""
|
||||
from app.routes.search_source_connectors_routes import (
|
||||
run_obsidian_indexing,
|
||||
)
|
||||
|
||||
async with get_celery_session_maker()() as session:
|
||||
await run_obsidian_indexing(
|
||||
session, connector_id, search_space_id, user_id, start_date, end_date
|
||||
)
|
||||
|
||||
|
||||
@celery_app.task(name="index_composio_connector", bind=True)
|
||||
def index_composio_connector_task(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ Supports loading LLM configurations from:
|
|||
|
||||
import json
|
||||
from collections.abc import AsyncGenerator
|
||||
from uuid import UUID
|
||||
|
||||
from langchain_core.messages import HumanMessage
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
|
@ -27,6 +28,10 @@ from app.agents.new_chat.llm_config import (
|
|||
)
|
||||
from app.db import Document, SurfsenseDocsDocument
|
||||
from app.schemas.new_chat import ChatAttachment
|
||||
from app.services.chat_session_state_service import (
|
||||
clear_ai_responding,
|
||||
set_ai_responding,
|
||||
)
|
||||
from app.services.connector_service import ConnectorService
|
||||
from app.services.new_streaming_service import VercelStreamingService
|
||||
|
||||
|
|
@ -167,9 +172,8 @@ async def stream_new_chat(
|
|||
search_space_id: The search space ID
|
||||
chat_id: The chat ID (used as LangGraph thread_id for memory)
|
||||
session: The database session
|
||||
user_id: The current user's UUID string (for memory tools)
|
||||
user_id: The current user's UUID string (for memory tools and session state)
|
||||
llm_config_id: The LLM configuration ID (default: -1 for first global config)
|
||||
messages: Optional chat history from frontend (list of ChatMessage)
|
||||
attachments: Optional attachments with extracted content
|
||||
mentioned_document_ids: Optional list of document IDs mentioned with @ in the chat
|
||||
mentioned_surfsense_doc_ids: Optional list of SurfSense doc IDs mentioned with @ in the chat
|
||||
|
|
@ -183,6 +187,9 @@ async def stream_new_chat(
|
|||
current_text_id: str | None = None
|
||||
|
||||
try:
|
||||
# Mark AI as responding to this user for live collaboration
|
||||
if user_id:
|
||||
await set_ai_responding(session, chat_id, UUID(user_id))
|
||||
# Load LLM config - supports both YAML (negative IDs) and database (positive IDs)
|
||||
agent_config: AgentConfig | None = None
|
||||
|
||||
|
|
@ -1147,3 +1154,7 @@ async def stream_new_chat(
|
|||
yield streaming_service.format_finish_step()
|
||||
yield streaming_service.format_finish()
|
||||
yield streaming_service.format_done()
|
||||
|
||||
finally:
|
||||
# Clear AI responding state for live collaboration
|
||||
await clear_ai_responding(session, chat_id)
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ from .luma_indexer import index_luma_events
|
|||
|
||||
# Documentation and knowledge management
|
||||
from .notion_indexer import index_notion_pages
|
||||
from .obsidian_indexer import index_obsidian_vault
|
||||
from .slack_indexer import index_slack_messages
|
||||
from .webcrawler_indexer import index_crawled_urls
|
||||
|
||||
|
|
@ -68,6 +69,7 @@ __all__ = [ # noqa: RUF022
|
|||
"index_linear_issues",
|
||||
# Documentation and knowledge management
|
||||
"index_notion_pages",
|
||||
"index_obsidian_vault",
|
||||
"index_crawled_urls",
|
||||
# Communication platforms
|
||||
"index_slack_messages",
|
||||
|
|
|
|||
|
|
@ -0,0 +1,516 @@
|
|||
"""
|
||||
Obsidian connector indexer.
|
||||
|
||||
Indexes markdown notes from a local Obsidian vault.
|
||||
This connector is only available in self-hosted mode.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.config import config
|
||||
from app.db import Document, DocumentType, SearchSourceConnectorType
|
||||
from app.services.llm_service import get_user_long_context_llm
|
||||
from app.services.task_logging_service import TaskLoggingService
|
||||
from app.utils.document_converters import (
|
||||
create_document_chunks,
|
||||
generate_content_hash,
|
||||
generate_document_summary,
|
||||
generate_unique_identifier_hash,
|
||||
)
|
||||
|
||||
from .base import (
|
||||
build_document_metadata_string,
|
||||
check_document_by_unique_identifier,
|
||||
get_connector_by_id,
|
||||
get_current_timestamp,
|
||||
logger,
|
||||
update_connector_last_indexed,
|
||||
)
|
||||
|
||||
|
||||
def parse_frontmatter(content: str) -> tuple[dict | None, str]:
|
||||
"""
|
||||
Parse YAML frontmatter from markdown content.
|
||||
|
||||
Args:
|
||||
content: The full markdown content
|
||||
|
||||
Returns:
|
||||
Tuple of (frontmatter dict or None, content without frontmatter)
|
||||
"""
|
||||
if not content.startswith("---"):
|
||||
return None, content
|
||||
|
||||
# Find the closing ---
|
||||
end_match = re.search(r"\n---\n", content[3:])
|
||||
if not end_match:
|
||||
return None, content
|
||||
|
||||
frontmatter_str = content[3 : end_match.start() + 3]
|
||||
remaining_content = content[end_match.end() + 3 :]
|
||||
|
||||
try:
|
||||
frontmatter = yaml.safe_load(frontmatter_str)
|
||||
return frontmatter, remaining_content.strip()
|
||||
except yaml.YAMLError:
|
||||
return None, content
|
||||
|
||||
|
||||
def extract_wiki_links(content: str) -> list[str]:
|
||||
"""
|
||||
Extract [[wiki-style links]] from content.
|
||||
|
||||
Args:
|
||||
content: Markdown content
|
||||
|
||||
Returns:
|
||||
List of linked note names
|
||||
"""
|
||||
# Match [[link]] or [[link|alias]]
|
||||
pattern = r"\[\[([^\]|]+)(?:\|[^\]]+)?\]\]"
|
||||
matches = re.findall(pattern, content)
|
||||
return list(set(matches))
|
||||
|
||||
|
||||
def extract_tags(content: str) -> list[str]:
|
||||
"""
|
||||
Extract #tags from content (both inline and frontmatter).
|
||||
|
||||
Args:
|
||||
content: Markdown content
|
||||
|
||||
Returns:
|
||||
List of tags (without # prefix)
|
||||
"""
|
||||
# Match #tag but not ## headers
|
||||
pattern = r"(?<!\S)#([a-zA-Z][a-zA-Z0-9_/-]*)"
|
||||
matches = re.findall(pattern, content)
|
||||
return list(set(matches))
|
||||
|
||||
|
||||
def scan_vault(
|
||||
vault_path: str,
|
||||
exclude_folders: list[str] | None = None,
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Scan an Obsidian vault for markdown files.
|
||||
|
||||
Args:
|
||||
vault_path: Path to the Obsidian vault
|
||||
exclude_folders: List of folder names to exclude
|
||||
|
||||
Returns:
|
||||
List of file info dicts with path, name, modified time
|
||||
"""
|
||||
if exclude_folders is None:
|
||||
exclude_folders = [".trash", ".obsidian", "templates"]
|
||||
|
||||
vault = Path(vault_path)
|
||||
if not vault.exists():
|
||||
raise ValueError(f"Vault path does not exist: {vault_path}")
|
||||
|
||||
files = []
|
||||
for md_file in vault.rglob("*.md"):
|
||||
# Check if file is in an excluded folder
|
||||
relative_path = md_file.relative_to(vault)
|
||||
parts = relative_path.parts
|
||||
|
||||
if any(excluded in parts for excluded in exclude_folders):
|
||||
continue
|
||||
|
||||
try:
|
||||
stat = md_file.stat()
|
||||
files.append(
|
||||
{
|
||||
"path": str(md_file),
|
||||
"relative_path": str(relative_path),
|
||||
"name": md_file.stem,
|
||||
"modified_at": datetime.fromtimestamp(stat.st_mtime, tz=UTC),
|
||||
"created_at": datetime.fromtimestamp(stat.st_ctime, tz=UTC),
|
||||
"size": stat.st_size,
|
||||
}
|
||||
)
|
||||
except OSError as e:
|
||||
logger.warning(f"Could not stat file {md_file}: {e}")
|
||||
|
||||
return files
|
||||
|
||||
|
||||
async def index_obsidian_vault(
|
||||
session: AsyncSession,
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str | None = None,
|
||||
end_date: str | None = None,
|
||||
update_last_indexed: bool = True,
|
||||
) -> tuple[int, str | None]:
|
||||
"""
|
||||
Index notes from a local Obsidian vault.
|
||||
|
||||
This indexer is only available in self-hosted mode as it requires
|
||||
direct file system access to the user's Obsidian vault.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
connector_id: ID of the Obsidian connector
|
||||
search_space_id: ID of the search space to store documents in
|
||||
user_id: ID of the user
|
||||
start_date: Start date for filtering (YYYY-MM-DD format) - optional
|
||||
end_date: End date for filtering (YYYY-MM-DD format) - optional
|
||||
update_last_indexed: Whether to update the last_indexed_at timestamp
|
||||
|
||||
Returns:
|
||||
Tuple containing (number of documents indexed, error message or None)
|
||||
"""
|
||||
task_logger = TaskLoggingService(session, search_space_id)
|
||||
|
||||
# Check if self-hosted mode
|
||||
if not config.is_self_hosted():
|
||||
return 0, "Obsidian connector is only available in self-hosted mode"
|
||||
|
||||
# Log task start
|
||||
log_entry = await task_logger.log_task_start(
|
||||
task_name="obsidian_vault_indexing",
|
||||
source="connector_indexing_task",
|
||||
message=f"Starting Obsidian vault indexing for connector {connector_id}",
|
||||
metadata={
|
||||
"connector_id": connector_id,
|
||||
"user_id": str(user_id),
|
||||
"start_date": start_date,
|
||||
"end_date": end_date,
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
# Get the connector
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Retrieving Obsidian connector {connector_id} from database",
|
||||
{"stage": "connector_retrieval"},
|
||||
)
|
||||
|
||||
connector = await get_connector_by_id(
|
||||
session, connector_id, SearchSourceConnectorType.OBSIDIAN_CONNECTOR
|
||||
)
|
||||
|
||||
if not connector:
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"Connector with ID {connector_id} not found or is not an Obsidian connector",
|
||||
"Connector not found",
|
||||
{"error_type": "ConnectorNotFound"},
|
||||
)
|
||||
return (
|
||||
0,
|
||||
f"Connector with ID {connector_id} not found or is not an Obsidian connector",
|
||||
)
|
||||
|
||||
# Get vault path from connector config
|
||||
vault_path = connector.config.get("vault_path")
|
||||
if not vault_path:
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
"Vault path not configured for this connector",
|
||||
"Missing vault path",
|
||||
{"error_type": "MissingVaultPath"},
|
||||
)
|
||||
return 0, "Vault path not configured for this connector"
|
||||
|
||||
# Validate vault path exists
|
||||
if not os.path.exists(vault_path):
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"Vault path does not exist: {vault_path}",
|
||||
"Vault path not found",
|
||||
{"error_type": "VaultNotFound", "vault_path": vault_path},
|
||||
)
|
||||
return 0, f"Vault path does not exist: {vault_path}"
|
||||
|
||||
# Get configuration options
|
||||
exclude_folders = connector.config.get(
|
||||
"exclude_folders", [".trash", ".obsidian", "templates"]
|
||||
)
|
||||
vault_name = connector.config.get("vault_name") or os.path.basename(vault_path)
|
||||
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Scanning Obsidian vault: {vault_name}",
|
||||
{"stage": "vault_scan", "vault_path": vault_path},
|
||||
)
|
||||
|
||||
# Scan vault for markdown files
|
||||
try:
|
||||
files = scan_vault(vault_path, exclude_folders)
|
||||
except Exception as e:
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"Failed to scan vault: {e}",
|
||||
"Vault scan error",
|
||||
{"error_type": "VaultScanError"},
|
||||
)
|
||||
return 0, f"Failed to scan vault: {e}"
|
||||
|
||||
logger.info(f"Found {len(files)} markdown files in vault")
|
||||
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Found {len(files)} markdown files to process",
|
||||
{"stage": "files_discovered", "file_count": len(files)},
|
||||
)
|
||||
|
||||
# Filter by date if provided (handle "undefined" string from frontend)
|
||||
# Also handle inverted dates (start > end) by skipping filtering
|
||||
start_dt = None
|
||||
end_dt = None
|
||||
|
||||
if start_date and start_date != "undefined":
|
||||
start_dt = datetime.strptime(start_date, "%Y-%m-%d").replace(tzinfo=UTC)
|
||||
|
||||
if end_date and end_date != "undefined":
|
||||
# Make end_date inclusive (end of day)
|
||||
end_dt = datetime.strptime(end_date, "%Y-%m-%d").replace(tzinfo=UTC)
|
||||
end_dt = end_dt.replace(hour=23, minute=59, second=59)
|
||||
|
||||
# Only apply date filtering if dates are valid and in correct order
|
||||
if start_dt and end_dt and start_dt > end_dt:
|
||||
logger.warning(
|
||||
f"start_date ({start_date}) is after end_date ({end_date}), skipping date filter"
|
||||
)
|
||||
else:
|
||||
if start_dt:
|
||||
files = [f for f in files if f["modified_at"] >= start_dt]
|
||||
logger.info(
|
||||
f"After start_date filter ({start_date}): {len(files)} files"
|
||||
)
|
||||
if end_dt:
|
||||
files = [f for f in files if f["modified_at"] <= end_dt]
|
||||
logger.info(f"After end_date filter ({end_date}): {len(files)} files")
|
||||
|
||||
logger.info(f"Processing {len(files)} files after date filtering")
|
||||
|
||||
# Get LLM for summarization
|
||||
long_context_llm = await get_user_long_context_llm(
|
||||
session, user_id, search_space_id
|
||||
)
|
||||
|
||||
indexed_count = 0
|
||||
skipped_count = 0
|
||||
|
||||
for file_info in files:
|
||||
try:
|
||||
file_path = file_info["path"]
|
||||
relative_path = file_info["relative_path"]
|
||||
|
||||
# Read file content
|
||||
try:
|
||||
with open(file_path, encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
except UnicodeDecodeError:
|
||||
logger.warning(f"Could not decode file {file_path}, skipping")
|
||||
skipped_count += 1
|
||||
continue
|
||||
|
||||
if not content.strip():
|
||||
logger.debug(f"Empty file {file_path}, skipping")
|
||||
skipped_count += 1
|
||||
continue
|
||||
|
||||
# Parse frontmatter and extract metadata
|
||||
frontmatter, body_content = parse_frontmatter(content)
|
||||
wiki_links = extract_wiki_links(content)
|
||||
tags = extract_tags(content)
|
||||
|
||||
# Get title from frontmatter or filename
|
||||
title = file_info["name"]
|
||||
if frontmatter:
|
||||
title = frontmatter.get("title", title)
|
||||
# Also extract tags from frontmatter
|
||||
fm_tags = frontmatter.get("tags", [])
|
||||
if isinstance(fm_tags, list):
|
||||
tags = list({*tags, *fm_tags})
|
||||
elif isinstance(fm_tags, str):
|
||||
tags = list({*tags, fm_tags})
|
||||
|
||||
# Generate unique identifier using vault name and relative path
|
||||
unique_identifier = f"{vault_name}:{relative_path}"
|
||||
unique_identifier_hash = generate_unique_identifier_hash(
|
||||
DocumentType.OBSIDIAN_CONNECTOR,
|
||||
unique_identifier,
|
||||
search_space_id,
|
||||
)
|
||||
|
||||
# Check for existing document
|
||||
existing_document = await check_document_by_unique_identifier(
|
||||
session, unique_identifier_hash
|
||||
)
|
||||
|
||||
# Generate content hash
|
||||
content_hash = generate_content_hash(content, search_space_id)
|
||||
|
||||
# Build metadata
|
||||
document_metadata = {
|
||||
"vault_name": vault_name,
|
||||
"file_path": relative_path,
|
||||
"tags": tags,
|
||||
"outgoing_links": wiki_links,
|
||||
"frontmatter": frontmatter,
|
||||
"modified_at": file_info["modified_at"].isoformat(),
|
||||
"created_at": file_info["created_at"].isoformat(),
|
||||
"word_count": len(body_content.split()),
|
||||
}
|
||||
|
||||
# Build document content with metadata
|
||||
metadata_sections = [
|
||||
(
|
||||
"METADATA",
|
||||
[
|
||||
f"Title: {title}",
|
||||
f"Vault: {vault_name}",
|
||||
f"Path: {relative_path}",
|
||||
f"Tags: {', '.join(tags) if tags else 'None'}",
|
||||
f"Links to: {', '.join(wiki_links) if wiki_links else 'None'}",
|
||||
],
|
||||
),
|
||||
("CONTENT", [body_content]),
|
||||
]
|
||||
document_string = build_document_metadata_string(metadata_sections)
|
||||
|
||||
if existing_document:
|
||||
# Check if content has changed
|
||||
if existing_document.content_hash == content_hash:
|
||||
logger.debug(f"Note {title} unchanged, skipping")
|
||||
skipped_count += 1
|
||||
continue
|
||||
|
||||
# Update existing document
|
||||
logger.info(f"Updating note: {title}")
|
||||
|
||||
# Generate new summary if content changed
|
||||
if long_context_llm:
|
||||
new_summary, _ = await generate_document_summary(
|
||||
document_string,
|
||||
long_context_llm,
|
||||
document_metadata,
|
||||
)
|
||||
# Store summary in metadata
|
||||
document_metadata["summary"] = new_summary
|
||||
|
||||
# Add URL and connector_id to metadata
|
||||
document_metadata["url"] = (
|
||||
f"obsidian://{vault_name}/{relative_path}"
|
||||
)
|
||||
document_metadata["connector_id"] = connector_id
|
||||
|
||||
existing_document.content = document_string
|
||||
existing_document.content_hash = content_hash
|
||||
existing_document.document_metadata = document_metadata
|
||||
existing_document.updated_at = get_current_timestamp()
|
||||
|
||||
# Update embedding
|
||||
embedding = config.embedding_model_instance.embed(document_string)
|
||||
existing_document.embedding = embedding
|
||||
|
||||
# Update chunks - delete old and create new
|
||||
existing_document.chunks.clear()
|
||||
new_chunks = await create_document_chunks(document_string)
|
||||
existing_document.chunks = new_chunks
|
||||
|
||||
indexed_count += 1
|
||||
|
||||
else:
|
||||
# Create new document
|
||||
logger.info(f"Indexing new note: {title}")
|
||||
|
||||
# Generate summary
|
||||
summary_content = ""
|
||||
if long_context_llm:
|
||||
summary_content, _ = await generate_document_summary(
|
||||
document_string,
|
||||
long_context_llm,
|
||||
document_metadata,
|
||||
)
|
||||
|
||||
# Generate embedding
|
||||
embedding = config.embedding_model_instance.embed(document_string)
|
||||
|
||||
# Add URL and summary to metadata
|
||||
document_metadata["url"] = (
|
||||
f"obsidian://{vault_name}/{relative_path}"
|
||||
)
|
||||
document_metadata["summary"] = summary_content
|
||||
document_metadata["connector_id"] = connector_id
|
||||
|
||||
# Create chunks
|
||||
chunks = await create_document_chunks(document_string)
|
||||
|
||||
# Create document
|
||||
new_document = Document(
|
||||
search_space_id=search_space_id,
|
||||
title=title,
|
||||
document_type=DocumentType.OBSIDIAN_CONNECTOR,
|
||||
content=document_string,
|
||||
content_hash=content_hash,
|
||||
unique_identifier_hash=unique_identifier_hash,
|
||||
document_metadata=document_metadata,
|
||||
embedding=embedding,
|
||||
chunks=chunks,
|
||||
updated_at=get_current_timestamp(),
|
||||
)
|
||||
|
||||
session.add(new_document)
|
||||
|
||||
indexed_count += 1
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(
|
||||
f"Error processing file {file_info.get('path', 'unknown')}: {e}"
|
||||
)
|
||||
skipped_count += 1
|
||||
continue
|
||||
|
||||
# Update connector's last indexed timestamp
|
||||
await update_connector_last_indexed(session, connector, update_last_indexed)
|
||||
|
||||
# Commit all changes
|
||||
await session.commit()
|
||||
|
||||
await task_logger.log_task_success(
|
||||
log_entry,
|
||||
f"Successfully indexed {indexed_count} Obsidian notes (skipped {skipped_count})",
|
||||
{
|
||||
"indexed_count": indexed_count,
|
||||
"skipped_count": skipped_count,
|
||||
"total_files": len(files),
|
||||
},
|
||||
)
|
||||
|
||||
return indexed_count, None
|
||||
|
||||
except SQLAlchemyError as e:
|
||||
logger.exception(f"Database error during Obsidian indexing: {e}")
|
||||
await session.rollback()
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"Database error during Obsidian indexing: {e}",
|
||||
"Database error",
|
||||
{"error_type": "SQLAlchemyError"},
|
||||
)
|
||||
return 0, f"Database error: {e}"
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"Error during Obsidian indexing: {e}")
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"Error during Obsidian indexing: {e}",
|
||||
"Unexpected error",
|
||||
{"error_type": type(e).__name__},
|
||||
)
|
||||
return 0, str(e)
|
||||
|
|
@ -46,12 +46,9 @@ dependencies = [
|
|||
"boto3>=1.35.0",
|
||||
"langchain-community>=0.3.31",
|
||||
"langchain-unstructured>=1.0.0",
|
||||
"langchain>=1.2.0",
|
||||
"litellm>=1.80.10",
|
||||
"langchain-litellm>=0.3.5",
|
||||
"langgraph>=1.0.5",
|
||||
"fake-useragent>=2.2.0",
|
||||
"deepagents>=0.3.0",
|
||||
"trafilatura>=2.0.0",
|
||||
"fastapi-users[oauth,sqlalchemy]>=15.0.3",
|
||||
"chonkie[all]>=1.5.0",
|
||||
|
|
@ -62,6 +59,9 @@ dependencies = [
|
|||
"sse-starlette>=3.1.1,<3.1.2",
|
||||
"gitingest>=0.3.1",
|
||||
"composio>=0.10.9",
|
||||
"deepagents>=0.3.8",
|
||||
"langchain>=1.2.6",
|
||||
"langgraph>=1.0.5",
|
||||
]
|
||||
|
||||
[dependency-groups]
|
||||
|
|
|
|||
58
surfsense_backend/uv.lock
generated
58
surfsense_backend/uv.lock
generated
|
|
@ -195,7 +195,7 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "anthropic"
|
||||
version = "0.75.0"
|
||||
version = "0.76.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "anyio" },
|
||||
|
|
@ -207,9 +207,9 @@ dependencies = [
|
|||
{ name = "sniffio" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/04/1f/08e95f4b7e2d35205ae5dcbb4ae97e7d477fc521c275c02609e2931ece2d/anthropic-0.75.0.tar.gz", hash = "sha256:e8607422f4ab616db2ea5baacc215dd5f028da99ce2f022e33c7c535b29f3dfb", size = 439565 }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/6e/be/d11abafaa15d6304826438170f7574d750218f49a106c54424a40cef4494/anthropic-0.76.0.tar.gz", hash = "sha256:e0cae6a368986d5cf6df743dfbb1b9519e6a9eee9c6c942ad8121c0b34416ffe", size = 495483 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/60/1c/1cd02b7ae64302a6e06724bf80a96401d5313708651d277b1458504a1730/anthropic-0.75.0-py3-none-any.whl", hash = "sha256:ea8317271b6c15d80225a9f3c670152746e88805a7a61e14d4a374577164965b", size = 388164 },
|
||||
{ url = "https://files.pythonhosted.org/packages/e5/70/7b0fd9c1a738f59d3babe2b4212031c34ab7d0fda4ffef15b58a55c5bcea/anthropic-0.76.0-py3-none-any.whl", hash = "sha256:81efa3113901192af2f0fe977d3ec73fdadb1e691586306c4256cd6d5ccc331c", size = 390309 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -1231,17 +1231,18 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "deepagents"
|
||||
version = "0.3.0"
|
||||
version = "0.3.8"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "langchain" },
|
||||
{ name = "langchain-anthropic" },
|
||||
{ name = "langchain-core" },
|
||||
{ name = "langchain-google-genai" },
|
||||
{ name = "wcmatch" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/cc/c7/d3c2840bd0e66b6cd5948aa69625e129328ad261308e18fcb9a9420709da/deepagents-0.3.0.tar.gz", hash = "sha256:3dd4d2ed53efb1ef78aeb1020a5696c0ec7e58e627b305a6665d33fe6fbdedff", size = 51387 }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/47/69/d8dd80dd5c0c81393cc32623dd51e642c8607ab798276506b3b3e89b1f20/deepagents-0.3.8.tar.gz", hash = "sha256:4b8252f8deaad449ce39426cc2233a597ee079b9b690647a26d128c16d6c6eb8", size = 73956 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/22/e9/60bab7f37ff38bf982ea578e457ed1878ded613a3425462bcd07b00487e9/deepagents-0.3.0-py3-none-any.whl", hash = "sha256:9e23532d8d535dc2b0b4e0834453a1223a6a8f81b77947c0faf54537d05ce89a", size = 54065 },
|
||||
{ url = "https://files.pythonhosted.org/packages/a6/6a/35968909bd3184eafee97326bcfd16c99bf9b0e03aadb3327eaf7229ea11/deepagents-0.3.8-py3-none-any.whl", hash = "sha256:7c76205dc014173d795402045b51505517954c7c4a508175f8e4a529f51928cc", size = 79161 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -1992,9 +1993,9 @@ dependencies = [
|
|||
{ name = "starlette" },
|
||||
{ name = "tiktoken" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/d6/fe/a915f0c32a3d7920206a677f73c185b3eadf4ec151fb05aedd52e64713f7/gitingest-0.3.1.tar.gz", hash = "sha256:4587cab873d4e08bdb16d612bb153c23e0ce59771a1d57a438239c5e39f05ebf", size = 70681, upload-time = "2025-07-31T13:56:19.845Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/d6/fe/a915f0c32a3d7920206a677f73c185b3eadf4ec151fb05aedd52e64713f7/gitingest-0.3.1.tar.gz", hash = "sha256:4587cab873d4e08bdb16d612bb153c23e0ce59771a1d57a438239c5e39f05ebf", size = 70681 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/00/15/f200ab2e73287e67d1dce6fbacf421552ae9fbafdc5f0cc8dd0d2fe4fc47/gitingest-0.3.1-py3-none-any.whl", hash = "sha256:8143a5e6a7140ede9f680e13d3931ac07c82ac9bd8bab9ad1fba017c8c1e8666", size = 68343, upload-time = "2025-07-31T13:56:17.729Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/00/15/f200ab2e73287e67d1dce6fbacf421552ae9fbafdc5f0cc8dd0d2fe4fc47/gitingest-0.3.1-py3-none-any.whl", hash = "sha256:8143a5e6a7140ede9f680e13d3931ac07c82ac9bd8bab9ad1fba017c8c1e8666", size = 68343 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -2916,30 +2917,30 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "langchain"
|
||||
version = "1.2.0"
|
||||
version = "1.2.6"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "langchain-core" },
|
||||
{ name = "langgraph" },
|
||||
{ name = "pydantic" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/b1/12/3a74c22abdfddd877dfc2ee666d516f9132877fcd25eb4dd694835c59c79/langchain-1.2.0.tar.gz", hash = "sha256:a087d1e2b2969819e29a91a6d5f98302aafe31bd49ba377ecee3bf5a5dcfe14a", size = 536126 }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/f5/bc/d8f506a525baadee99a65c6cc28c1c35c9eaf1cb2009f048e9861d81a600/langchain-1.2.6.tar.gz", hash = "sha256:7d46cbf719d860a16f6fc182d5d3de17453dda187f3d43e9c40ac352a5094fdd", size = 553127 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/23/00/4e3fa0d90f5a5c376ccb8ca983d0f0f7287783dfac48702e18f01d24673b/langchain-1.2.0-py3-none-any.whl", hash = "sha256:82f0d17aa4fbb11560b30e1e7d4aeb75e3ad71ce09b85c90ab208b181a24ffac", size = 102828 },
|
||||
{ url = "https://files.pythonhosted.org/packages/3f/28/d5dc4cb06ccb29d62a590d446072964766555e85863f5044c6e644c07d0d/langchain-1.2.6-py3-none-any.whl", hash = "sha256:a9a6c39f03c09b6eb0f1b47e267ad2a2fd04e124dfaa9753bd6c11d2fe7d944e", size = 108458 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "langchain-anthropic"
|
||||
version = "1.3.0"
|
||||
version = "1.3.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "anthropic" },
|
||||
{ name = "langchain-core" },
|
||||
{ name = "pydantic" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/de/50/cc3b3e0410d86de457d7a100dde763fc1c33c4ce884e883659aa4cf95538/langchain_anthropic-1.3.0.tar.gz", hash = "sha256:497a937ee0310c588196bff37f39f02d43d87bff3a12d16278bdbc3bd0e9a80b", size = 707207 }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/0d/b6/ac5ee84e15bf79844c9c791f99a614c7ec7e1a63c2947e55977be01a81b4/langchain_anthropic-1.3.1.tar.gz", hash = "sha256:4f3d7a4a7729ab1aeaf62d32c87d4d227c1b5421668ca9e3734562b383470b07", size = 708940 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/29/ca/0725bc347a9c226da9d76f85bf7d03115caec7dbc87876af68579c4ab24e/langchain_anthropic-1.3.0-py3-none-any.whl", hash = "sha256:3823560e1df15d6082636baa04f87cb59052ba70aada0eba381c4679b1ce0eba", size = 45724 },
|
||||
{ url = "https://files.pythonhosted.org/packages/9a/4f/7a5b32764addf4b757545b89899b9d76688176f19e4ee89868e3b8bbfd0f/langchain_anthropic-1.3.1-py3-none-any.whl", hash = "sha256:1fc28cf8037c30597ee6172fc2ff9e345efe8149a8c2a39897b1eebba2948322", size = 46328 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -2967,7 +2968,7 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "langchain-core"
|
||||
version = "1.2.1"
|
||||
version = "1.2.7"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "jsonpatch" },
|
||||
|
|
@ -2979,9 +2980,24 @@ dependencies = [
|
|||
{ name = "typing-extensions" },
|
||||
{ name = "uuid-utils" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/f5/a0/2177f4ef4dfbea8edeba377b7b4889d177b8356ce186640e4651b240fd4d/langchain_core-1.2.1.tar.gz", hash = "sha256:131e6ad105b47ec2adc4d4d973f569276688f48cd890ba44603d48e76d9993ce", size = 802986 }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a2/0e/664d8d81b3493e09cbab72448d2f9d693d1fa5aa2bcc488602203a9b6da0/langchain_core-1.2.7.tar.gz", hash = "sha256:e1460639f96c352b4a41c375f25aeb8d16ffc1769499fb1c20503aad59305ced", size = 837039 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/cc/95/98c47dbb4b6098934ff70e0f52efef3a85505dbcccc9eb63587e21fde4c9/langchain_core-1.2.1-py3-none-any.whl", hash = "sha256:2f63859f85dc3d95f768e35fed605702e3ff5aa3e92c7b253103119613e79768", size = 475972 },
|
||||
{ url = "https://files.pythonhosted.org/packages/6e/6f/34a9fba14d191a67f7e2ee3dbce3e9b86d2fa7310e2c7f2c713583481bd2/langchain_core-1.2.7-py3-none-any.whl", hash = "sha256:452f4fef7a3d883357b22600788d37e3d8854ef29da345b7ac7099f33c31828b", size = 490232 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "langchain-google-genai"
|
||||
version = "4.2.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "filetype" },
|
||||
{ name = "google-genai" },
|
||||
{ name = "langchain-core" },
|
||||
{ name = "pydantic" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/d8/0b/eae2305e207574dc633983a8a82a745e0ede1bce1f3a9daff24d2341fadc/langchain_google_genai-4.2.0.tar.gz", hash = "sha256:9a8d9bfc35354983ed29079cefff53c3e7c9c2a44b6ba75cc8f13a0cf8b55c33", size = 277361 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/22/51/39942c0083139652494bb354dddf0ed397703a4882302f7b48aeca531c96/langchain_google_genai-4.2.0-py3-none-any.whl", hash = "sha256:856041aaafceff65a4ef0d5acf5731f2db95229ff041132af011aec51e8279d9", size = 66452 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -4516,9 +4532,9 @@ wheels = [
|
|||
name = "pathspec"
|
||||
version = "1.0.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/4c/b2/bb8e495d5262bfec41ab5cb18f522f1012933347fb5d9e62452d446baca2/pathspec-1.0.3.tar.gz", hash = "sha256:bac5cf97ae2c2876e2d25ebb15078eb04d76e4b98921ee31c6f85ade8b59444d", size = 130841, upload-time = "2026-01-09T15:46:46.009Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/4c/b2/bb8e495d5262bfec41ab5cb18f522f1012933347fb5d9e62452d446baca2/pathspec-1.0.3.tar.gz", hash = "sha256:bac5cf97ae2c2876e2d25ebb15078eb04d76e4b98921ee31c6f85ade8b59444d", size = 130841 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/32/2b/121e912bd60eebd623f873fd090de0e84f322972ab25a7f9044c056804ed/pathspec-1.0.3-py3-none-any.whl", hash = "sha256:e80767021c1cc524aa3fb14bedda9c34406591343cc42797b386ce7b9354fb6c", size = 55021, upload-time = "2026-01-09T15:46:44.652Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/32/2b/121e912bd60eebd623f873fd090de0e84f322972ab25a7f9044c056804ed/pathspec-1.0.3-py3-none-any.whl", hash = "sha256:e80767021c1cc524aa3fb14bedda9c34406591343cc42797b386ce7b9354fb6c", size = 55021 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -6606,7 +6622,7 @@ requires-dist = [
|
|||
{ name = "chonkie", extras = ["all"], specifier = ">=1.5.0" },
|
||||
{ name = "composio", specifier = ">=0.10.9" },
|
||||
{ name = "datasets", specifier = ">=2.21.0" },
|
||||
{ name = "deepagents", specifier = ">=0.3.0" },
|
||||
{ name = "deepagents", specifier = ">=0.3.8" },
|
||||
{ name = "discord-py", specifier = ">=2.5.2" },
|
||||
{ name = "docling", specifier = ">=2.15.0" },
|
||||
{ name = "elasticsearch", specifier = ">=9.1.1" },
|
||||
|
|
@ -6622,7 +6638,7 @@ requires-dist = [
|
|||
{ name = "google-api-python-client", specifier = ">=2.156.0" },
|
||||
{ name = "google-auth-oauthlib", specifier = ">=1.2.1" },
|
||||
{ name = "kokoro", specifier = ">=0.9.4" },
|
||||
{ name = "langchain", specifier = ">=1.2.0" },
|
||||
{ name = "langchain", specifier = ">=1.2.6" },
|
||||
{ name = "langchain-community", specifier = ">=0.3.31" },
|
||||
{ name = "langchain-litellm", specifier = ">=0.3.5" },
|
||||
{ name = "langchain-unstructured", specifier = ">=1.0.0" },
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue