mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-15 18:25:18 +02:00
Merge remote-tracking branch 'upstream/main' into feature/blocknote-editor
This commit is contained in:
commit
b98c312fb1
81 changed files with 8976 additions and 2387 deletions
19
README.md
19
README.md
|
|
@ -38,19 +38,24 @@ https://github.com/user-attachments/assets/a0a16566-6967-4374-ac51-9b3e07fbecd7
|
|||
## Key Features
|
||||
|
||||
### 💡 **Idea**:
|
||||
Have your own highly customizable private NotebookLM and Perplexity integrated with external sources.
|
||||
- Have your own highly customizable private NotebookLM and Perplexity integrated with external sources.
|
||||
### 📁 **Multiple File Format Uploading Support**
|
||||
Save content from your own personal files *(Documents, images, videos and supports **50+ file extensions**)* to your own personal knowledge base .
|
||||
- Save content from your own personal files *(Documents, images, videos and supports **50+ file extensions**)* to your own personal knowledge base .
|
||||
### 🔍 **Powerful Search**
|
||||
Quickly research or find anything in your saved content .
|
||||
- Quickly research or find anything in your saved content .
|
||||
### 💬 **Chat with your Saved Content**
|
||||
Interact in Natural Language and get cited answers.
|
||||
- Interact in Natural Language and get cited answers.
|
||||
### 📄 **Cited Answers**
|
||||
Get Cited answers just like Perplexity.
|
||||
- Get Cited answers just like Perplexity.
|
||||
### 🔔 **Privacy & Local LLM Support**
|
||||
Works Flawlessly with Ollama local LLMs.
|
||||
- Works Flawlessly with Ollama local LLMs.
|
||||
### 🏠 **Self Hostable**
|
||||
Open source and easy to deploy locally.
|
||||
- Open source and easy to deploy locally.
|
||||
### 👥 **Team Collaboration with RBAC**
|
||||
- Role-Based Access Control for Search Spaces
|
||||
- Invite team members with customizable roles (Owner, Admin, Editor, Viewer)
|
||||
- Granular permissions for documents, chats, connectors, and settings
|
||||
- Share knowledge bases securely within your organization
|
||||
### 🎙️ Podcasts
|
||||
- Blazingly fast podcast generation agent. (Creates a 3-minute podcast in under 20 seconds.)
|
||||
- Convert your chat conversations into engaging audio content
|
||||
|
|
|
|||
|
|
@ -39,25 +39,31 @@ https://github.com/user-attachments/assets/a0a16566-6967-4374-ac51-9b3e07fbecd7
|
|||
## 核心功能
|
||||
|
||||
### 💡 **理念**:
|
||||
拥有您自己的高度可定制的私有 NotebookLM 和 Perplexity,并与外部数据源集成。
|
||||
- 拥有您自己的高度可定制的私有 NotebookLM 和 Perplexity,并与外部数据源集成。
|
||||
|
||||
### 📁 **支持多种文件格式上传**
|
||||
将您个人文件中的内容(文档、图像、视频,支持 **50+ 种文件扩展名**)保存到您自己的个人知识库。
|
||||
- 将您个人文件中的内容(文档、图像、视频,支持 **50+ 种文件扩展名**)保存到您自己的个人知识库。
|
||||
|
||||
### 🔍 **强大的搜索功能**
|
||||
快速研究或查找已保存内容中的任何信息。
|
||||
- 快速研究或查找已保存内容中的任何信息。
|
||||
|
||||
### 💬 **与已保存内容对话**
|
||||
使用自然语言交互并获得引用答案。
|
||||
- 使用自然语言交互并获得引用答案。
|
||||
|
||||
### 📄 **引用答案**
|
||||
像 Perplexity 一样获得带引用的答案。
|
||||
- 像 Perplexity 一样获得带引用的答案。
|
||||
|
||||
### 🔔 **隐私保护与本地 LLM 支持**
|
||||
完美支持 Ollama 本地大语言模型。
|
||||
- 完美支持 Ollama 本地大语言模型。
|
||||
|
||||
### 🏠 **可自托管**
|
||||
开源且易于本地部署。
|
||||
- 开源且易于本地部署。
|
||||
|
||||
### 👥 **团队协作与 RBAC**
|
||||
- 搜索空间的基于角色的访问控制
|
||||
- 使用可自定义的角色(所有者、管理员、编辑者、查看者)邀请团队成员
|
||||
- 对文档、聊天、连接器和设置的细粒度权限控制
|
||||
- 在组织内安全共享知识库
|
||||
|
||||
### 🎙️ **播客功能**
|
||||
- 超快速播客生成代理(在 20 秒内创建 3 分钟播客)
|
||||
|
|
|
|||
|
|
@ -8,6 +8,8 @@ Create Date: 2025-11-13 23:20:12.912741
|
|||
|
||||
from collections.abc import Sequence
|
||||
|
||||
from sqlalchemy import text
|
||||
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
|
|
@ -17,6 +19,20 @@ branch_labels: str | Sequence[str] | None = None
|
|||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
|
||||
def constraint_exists(connection, table_name: str, constraint_name: str) -> bool:
|
||||
"""Check if a constraint exists on the given table."""
|
||||
result = connection.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT 1 FROM information_schema.table_constraints
|
||||
WHERE table_name = :table_name AND constraint_name = :constraint_name
|
||||
"""
|
||||
),
|
||||
{"table_name": table_name, "constraint_name": constraint_name},
|
||||
)
|
||||
return result.fetchone() is not None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""
|
||||
Remove foreign key constraints on LLM preference columns to allow global configs (negative IDs).
|
||||
|
|
@ -24,50 +40,55 @@ def upgrade() -> None:
|
|||
Global LLM configs use negative IDs and don't exist in the llm_configs table,
|
||||
so we need to remove the foreign key constraints that were preventing their use.
|
||||
"""
|
||||
# Drop the foreign key constraints
|
||||
op.drop_constraint(
|
||||
connection = op.get_bind()
|
||||
|
||||
# Drop the foreign key constraints if they exist
|
||||
constraints_to_drop = [
|
||||
"user_search_space_preferences_long_context_llm_id_fkey",
|
||||
"user_search_space_preferences",
|
||||
type_="foreignkey",
|
||||
)
|
||||
op.drop_constraint(
|
||||
"user_search_space_preferences_fast_llm_id_fkey",
|
||||
"user_search_space_preferences",
|
||||
type_="foreignkey",
|
||||
)
|
||||
op.drop_constraint(
|
||||
"user_search_space_preferences_strategic_llm_id_fkey",
|
||||
"user_search_space_preferences",
|
||||
type_="foreignkey",
|
||||
)
|
||||
]
|
||||
|
||||
for constraint_name in constraints_to_drop:
|
||||
if constraint_exists(
|
||||
connection, "user_search_space_preferences", constraint_name
|
||||
):
|
||||
op.drop_constraint(
|
||||
constraint_name,
|
||||
"user_search_space_preferences",
|
||||
type_="foreignkey",
|
||||
)
|
||||
else:
|
||||
print(f"Constraint '{constraint_name}' does not exist. Skipping.")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""
|
||||
Re-add foreign key constraints (will fail if any negative IDs exist in the table).
|
||||
"""
|
||||
# Re-add the foreign key constraints
|
||||
op.create_foreign_key(
|
||||
"user_search_space_preferences_long_context_llm_id_fkey",
|
||||
"user_search_space_preferences",
|
||||
"llm_configs",
|
||||
["long_context_llm_id"],
|
||||
["id"],
|
||||
ondelete="SET NULL",
|
||||
)
|
||||
op.create_foreign_key(
|
||||
"user_search_space_preferences_fast_llm_id_fkey",
|
||||
"user_search_space_preferences",
|
||||
"llm_configs",
|
||||
["fast_llm_id"],
|
||||
["id"],
|
||||
ondelete="SET NULL",
|
||||
)
|
||||
op.create_foreign_key(
|
||||
"user_search_space_preferences_strategic_llm_id_fkey",
|
||||
"user_search_space_preferences",
|
||||
"llm_configs",
|
||||
["strategic_llm_id"],
|
||||
["id"],
|
||||
ondelete="SET NULL",
|
||||
)
|
||||
connection = op.get_bind()
|
||||
|
||||
# Re-add the foreign key constraints if they don't exist
|
||||
constraints_to_create = [
|
||||
(
|
||||
"user_search_space_preferences_long_context_llm_id_fkey",
|
||||
"long_context_llm_id",
|
||||
),
|
||||
("user_search_space_preferences_fast_llm_id_fkey", "fast_llm_id"),
|
||||
("user_search_space_preferences_strategic_llm_id_fkey", "strategic_llm_id"),
|
||||
]
|
||||
|
||||
for constraint_name, column_name in constraints_to_create:
|
||||
if not constraint_exists(
|
||||
connection, "user_search_space_preferences", constraint_name
|
||||
):
|
||||
op.create_foreign_key(
|
||||
constraint_name,
|
||||
"user_search_space_preferences",
|
||||
"llm_configs",
|
||||
[column_name],
|
||||
["id"],
|
||||
ondelete="SET NULL",
|
||||
)
|
||||
else:
|
||||
print(f"Constraint '{constraint_name}' already exists. Skipping.")
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ Create Date: 2025-11-19 00:00:00.000000
|
|||
from collections.abc import Sequence
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy import text
|
||||
|
||||
from alembic import op
|
||||
|
||||
|
|
@ -19,24 +20,55 @@ branch_labels: str | Sequence[str] | None = None
|
|||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
|
||||
def column_exists(connection, table_name: str, column_name: str) -> bool:
|
||||
"""Check if a column exists on the given table."""
|
||||
result = connection.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = :table_name AND column_name = :column_name
|
||||
"""
|
||||
),
|
||||
{"table_name": table_name, "column_name": column_name},
|
||||
)
|
||||
return result.fetchone() is not None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Add QnA configuration columns to searchspaces table."""
|
||||
connection = op.get_bind()
|
||||
|
||||
# Add citations_enabled boolean (default True)
|
||||
op.add_column(
|
||||
"searchspaces",
|
||||
sa.Column(
|
||||
"citations_enabled", sa.Boolean(), nullable=False, server_default="true"
|
||||
),
|
||||
)
|
||||
if not column_exists(connection, "searchspaces", "citations_enabled"):
|
||||
op.add_column(
|
||||
"searchspaces",
|
||||
sa.Column(
|
||||
"citations_enabled", sa.Boolean(), nullable=False, server_default="true"
|
||||
),
|
||||
)
|
||||
else:
|
||||
print("Column 'citations_enabled' already exists. Skipping.")
|
||||
|
||||
# Add custom instructions text field (nullable, defaults to empty)
|
||||
op.add_column(
|
||||
"searchspaces",
|
||||
sa.Column("qna_custom_instructions", sa.Text(), nullable=True),
|
||||
)
|
||||
if not column_exists(connection, "searchspaces", "qna_custom_instructions"):
|
||||
op.add_column(
|
||||
"searchspaces",
|
||||
sa.Column("qna_custom_instructions", sa.Text(), nullable=True),
|
||||
)
|
||||
else:
|
||||
print("Column 'qna_custom_instructions' already exists. Skipping.")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Remove QnA configuration columns from searchspaces table."""
|
||||
op.drop_column("searchspaces", "qna_custom_instructions")
|
||||
op.drop_column("searchspaces", "citations_enabled")
|
||||
connection = op.get_bind()
|
||||
|
||||
if column_exists(connection, "searchspaces", "qna_custom_instructions"):
|
||||
op.drop_column("searchspaces", "qna_custom_instructions")
|
||||
else:
|
||||
print("Column 'qna_custom_instructions' does not exist. Skipping.")
|
||||
|
||||
if column_exists(connection, "searchspaces", "citations_enabled"):
|
||||
op.drop_column("searchspaces", "citations_enabled")
|
||||
else:
|
||||
print("Column 'citations_enabled' does not exist. Skipping.")
|
||||
|
|
|
|||
|
|
@ -0,0 +1,59 @@
|
|||
"""Add Webcrawler connector enums
|
||||
|
||||
Revision ID: 38
|
||||
Revises: 37
|
||||
Create Date: 2025-11-17 17:00:00.000000
|
||||
|
||||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
|
||||
from alembic import op
|
||||
|
||||
revision: str = "38"
|
||||
down_revision: str | None = "37"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Safely add 'WEBCRAWLER_CONNECTOR' to enum types if missing."""
|
||||
|
||||
# Add to searchsourceconnectortype enum
|
||||
op.execute(
|
||||
"""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_type t
|
||||
JOIN pg_enum e ON t.oid = e.enumtypid
|
||||
WHERE t.typname = 'searchsourceconnectortype' AND e.enumlabel = 'WEBCRAWLER_CONNECTOR'
|
||||
) THEN
|
||||
ALTER TYPE searchsourceconnectortype ADD VALUE 'WEBCRAWLER_CONNECTOR';
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
"""
|
||||
)
|
||||
|
||||
# Add to documenttype enum
|
||||
op.execute(
|
||||
"""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_type t
|
||||
JOIN pg_enum e ON t.oid = e.enumtypid
|
||||
WHERE t.typname = 'documenttype' AND e.enumlabel = 'CRAWLED_URL'
|
||||
) THEN
|
||||
ALTER TYPE documenttype ADD VALUE 'CRAWLED_URL';
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Remove 'WEBCRAWLER_CONNECTOR' from enum types."""
|
||||
pass
|
||||
179
surfsense_backend/alembic/versions/39_add_rbac_tables.py
Normal file
179
surfsense_backend/alembic/versions/39_add_rbac_tables.py
Normal file
|
|
@ -0,0 +1,179 @@
|
|||
"""Add RBAC tables for search space access control
|
||||
|
||||
Revision ID: 39
|
||||
Revises: 38
|
||||
Create Date: 2025-11-27 00:00:00.000000
|
||||
|
||||
This migration adds:
|
||||
- Permission enum for granular access control
|
||||
- search_space_roles table for custom roles per search space
|
||||
- search_space_memberships table for user-searchspace-role relationships
|
||||
- search_space_invites table for invite links
|
||||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
|
||||
from sqlalchemy import inspect
|
||||
|
||||
from alembic import op
|
||||
|
||||
revision: str = "39"
|
||||
down_revision: str | None = "38"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade schema - add RBAC tables for search space access control."""
|
||||
|
||||
# Create search_space_roles table
|
||||
op.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS search_space_roles (
|
||||
id SERIAL PRIMARY KEY,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
name VARCHAR(100) NOT NULL,
|
||||
description VARCHAR(500),
|
||||
permissions TEXT[] NOT NULL DEFAULT '{}',
|
||||
is_default BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
is_system_role BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
search_space_id INTEGER NOT NULL REFERENCES searchspaces(id) ON DELETE CASCADE,
|
||||
CONSTRAINT uq_searchspace_role_name UNIQUE (search_space_id, name)
|
||||
);
|
||||
"""
|
||||
)
|
||||
|
||||
# Create search_space_invites table (needs to be created before memberships due to FK)
|
||||
op.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS search_space_invites (
|
||||
id SERIAL PRIMARY KEY,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
invite_code VARCHAR(64) NOT NULL UNIQUE,
|
||||
search_space_id INTEGER NOT NULL REFERENCES searchspaces(id) ON DELETE CASCADE,
|
||||
role_id INTEGER REFERENCES search_space_roles(id) ON DELETE SET NULL,
|
||||
created_by_id UUID REFERENCES "user"(id) ON DELETE SET NULL,
|
||||
expires_at TIMESTAMPTZ,
|
||||
max_uses INTEGER,
|
||||
uses_count INTEGER NOT NULL DEFAULT 0,
|
||||
is_active BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
name VARCHAR(100)
|
||||
);
|
||||
"""
|
||||
)
|
||||
|
||||
# Create search_space_memberships table
|
||||
op.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS search_space_memberships (
|
||||
id SERIAL PRIMARY KEY,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
user_id UUID NOT NULL REFERENCES "user"(id) ON DELETE CASCADE,
|
||||
search_space_id INTEGER NOT NULL REFERENCES searchspaces(id) ON DELETE CASCADE,
|
||||
role_id INTEGER REFERENCES search_space_roles(id) ON DELETE SET NULL,
|
||||
is_owner BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
joined_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
invited_by_invite_id INTEGER REFERENCES search_space_invites(id) ON DELETE SET NULL,
|
||||
CONSTRAINT uq_user_searchspace_membership UNIQUE (user_id, search_space_id)
|
||||
);
|
||||
"""
|
||||
)
|
||||
|
||||
# Get connection and inspector for checking existing indexes
|
||||
conn = op.get_bind()
|
||||
inspector = inspect(conn)
|
||||
|
||||
# Create indexes for search_space_roles
|
||||
existing_indexes = [
|
||||
idx["name"] for idx in inspector.get_indexes("search_space_roles")
|
||||
]
|
||||
if "ix_search_space_roles_id" not in existing_indexes:
|
||||
op.create_index("ix_search_space_roles_id", "search_space_roles", ["id"])
|
||||
if "ix_search_space_roles_created_at" not in existing_indexes:
|
||||
op.create_index(
|
||||
"ix_search_space_roles_created_at", "search_space_roles", ["created_at"]
|
||||
)
|
||||
if "ix_search_space_roles_name" not in existing_indexes:
|
||||
op.create_index("ix_search_space_roles_name", "search_space_roles", ["name"])
|
||||
|
||||
# Create indexes for search_space_memberships
|
||||
existing_indexes = [
|
||||
idx["name"] for idx in inspector.get_indexes("search_space_memberships")
|
||||
]
|
||||
if "ix_search_space_memberships_id" not in existing_indexes:
|
||||
op.create_index(
|
||||
"ix_search_space_memberships_id", "search_space_memberships", ["id"]
|
||||
)
|
||||
if "ix_search_space_memberships_created_at" not in existing_indexes:
|
||||
op.create_index(
|
||||
"ix_search_space_memberships_created_at",
|
||||
"search_space_memberships",
|
||||
["created_at"],
|
||||
)
|
||||
if "ix_search_space_memberships_user_id" not in existing_indexes:
|
||||
op.create_index(
|
||||
"ix_search_space_memberships_user_id",
|
||||
"search_space_memberships",
|
||||
["user_id"],
|
||||
)
|
||||
if "ix_search_space_memberships_search_space_id" not in existing_indexes:
|
||||
op.create_index(
|
||||
"ix_search_space_memberships_search_space_id",
|
||||
"search_space_memberships",
|
||||
["search_space_id"],
|
||||
)
|
||||
|
||||
# Create indexes for search_space_invites
|
||||
existing_indexes = [
|
||||
idx["name"] for idx in inspector.get_indexes("search_space_invites")
|
||||
]
|
||||
if "ix_search_space_invites_id" not in existing_indexes:
|
||||
op.create_index("ix_search_space_invites_id", "search_space_invites", ["id"])
|
||||
if "ix_search_space_invites_created_at" not in existing_indexes:
|
||||
op.create_index(
|
||||
"ix_search_space_invites_created_at", "search_space_invites", ["created_at"]
|
||||
)
|
||||
if "ix_search_space_invites_invite_code" not in existing_indexes:
|
||||
op.create_index(
|
||||
"ix_search_space_invites_invite_code",
|
||||
"search_space_invites",
|
||||
["invite_code"],
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade schema - remove RBAC tables."""
|
||||
|
||||
# Drop indexes for search_space_memberships
|
||||
op.drop_index(
|
||||
"ix_search_space_memberships_search_space_id",
|
||||
table_name="search_space_memberships",
|
||||
)
|
||||
op.drop_index(
|
||||
"ix_search_space_memberships_user_id", table_name="search_space_memberships"
|
||||
)
|
||||
op.drop_index(
|
||||
"ix_search_space_memberships_created_at", table_name="search_space_memberships"
|
||||
)
|
||||
op.drop_index(
|
||||
"ix_search_space_memberships_id", table_name="search_space_memberships"
|
||||
)
|
||||
|
||||
# Drop indexes for search_space_invites
|
||||
op.drop_index(
|
||||
"ix_search_space_invites_invite_code", table_name="search_space_invites"
|
||||
)
|
||||
op.drop_index(
|
||||
"ix_search_space_invites_created_at", table_name="search_space_invites"
|
||||
)
|
||||
op.drop_index("ix_search_space_invites_id", table_name="search_space_invites")
|
||||
|
||||
# Drop indexes for search_space_roles
|
||||
op.drop_index("ix_search_space_roles_name", table_name="search_space_roles")
|
||||
op.drop_index("ix_search_space_roles_created_at", table_name="search_space_roles")
|
||||
op.drop_index("ix_search_space_roles_id", table_name="search_space_roles")
|
||||
|
||||
# Drop tables in correct order (respecting foreign key constraints)
|
||||
op.drop_table("search_space_memberships")
|
||||
op.drop_table("search_space_invites")
|
||||
op.drop_table("search_space_roles")
|
||||
|
|
@ -0,0 +1,63 @@
|
|||
"""Move LLM preferences from user-level to search space level
|
||||
|
||||
Revision ID: 40
|
||||
Revises: 39
|
||||
Create Date: 2024-11-27
|
||||
|
||||
This migration moves LLM preferences (long_context_llm_id, fast_llm_id, strategic_llm_id)
|
||||
from the user_search_space_preferences table to the searchspaces table itself.
|
||||
|
||||
This change supports the RBAC model where LLM preferences are shared by all members
|
||||
of a search space, rather than being per-user.
|
||||
"""
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "40"
|
||||
down_revision = "39"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
# Add LLM preference columns to searchspaces table
|
||||
op.add_column(
|
||||
"searchspaces",
|
||||
sa.Column("long_context_llm_id", sa.Integer(), nullable=True),
|
||||
)
|
||||
op.add_column(
|
||||
"searchspaces",
|
||||
sa.Column("fast_llm_id", sa.Integer(), nullable=True),
|
||||
)
|
||||
op.add_column(
|
||||
"searchspaces",
|
||||
sa.Column("strategic_llm_id", sa.Integer(), nullable=True),
|
||||
)
|
||||
|
||||
# Migrate existing preferences from user_search_space_preferences to searchspaces
|
||||
# We take the owner's preferences (the user who created the search space)
|
||||
connection = op.get_bind()
|
||||
|
||||
# Get all search spaces and their owner's preferences
|
||||
connection.execute(
|
||||
sa.text("""
|
||||
UPDATE searchspaces ss
|
||||
SET
|
||||
long_context_llm_id = usp.long_context_llm_id,
|
||||
fast_llm_id = usp.fast_llm_id,
|
||||
strategic_llm_id = usp.strategic_llm_id
|
||||
FROM user_search_space_preferences usp
|
||||
WHERE ss.id = usp.search_space_id
|
||||
AND ss.user_id = usp.user_id
|
||||
""")
|
||||
)
|
||||
|
||||
|
||||
def downgrade():
|
||||
# Remove LLM preference columns from searchspaces table
|
||||
op.drop_column("searchspaces", "strategic_llm_id")
|
||||
op.drop_column("searchspaces", "fast_llm_id")
|
||||
op.drop_column("searchspaces", "long_context_llm_id")
|
||||
|
|
@ -0,0 +1,212 @@
|
|||
"""Backfill RBAC data for existing search spaces
|
||||
|
||||
Revision ID: 41
|
||||
Revises: 40
|
||||
Create Date: 2025-11-28
|
||||
|
||||
This migration creates default roles and owner memberships for all existing
|
||||
search spaces that were created before the RBAC system was implemented.
|
||||
"""
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "41"
|
||||
down_revision = "40"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
# Default role permissions (must match DEFAULT_ROLE_PERMISSIONS in db.py)
|
||||
DEFAULT_ROLES = [
|
||||
{
|
||||
"name": "Owner",
|
||||
"description": "Full access to all resources",
|
||||
"permissions": ["*"],
|
||||
"is_system_role": True,
|
||||
"is_default": False,
|
||||
},
|
||||
{
|
||||
"name": "Admin",
|
||||
"description": "Can manage members, roles, and all content",
|
||||
"permissions": [
|
||||
"documents:create",
|
||||
"documents:read",
|
||||
"documents:update",
|
||||
"documents:delete",
|
||||
"chats:create",
|
||||
"chats:read",
|
||||
"chats:update",
|
||||
"chats:delete",
|
||||
"llm_configs:create",
|
||||
"llm_configs:read",
|
||||
"llm_configs:update",
|
||||
"llm_configs:delete",
|
||||
"logs:read",
|
||||
"logs:delete",
|
||||
"podcasts:create",
|
||||
"podcasts:read",
|
||||
"podcasts:update",
|
||||
"podcasts:delete",
|
||||
"connectors:create",
|
||||
"connectors:read",
|
||||
"connectors:update",
|
||||
"connectors:delete",
|
||||
"members:read",
|
||||
"members:update",
|
||||
"members:delete",
|
||||
"roles:create",
|
||||
"roles:read",
|
||||
"roles:update",
|
||||
"roles:delete",
|
||||
"invites:create",
|
||||
"invites:read",
|
||||
"invites:delete",
|
||||
"settings:read",
|
||||
"settings:update",
|
||||
],
|
||||
"is_system_role": True,
|
||||
"is_default": False,
|
||||
},
|
||||
{
|
||||
"name": "Editor",
|
||||
"description": "Can create and edit content",
|
||||
"permissions": [
|
||||
"documents:create",
|
||||
"documents:read",
|
||||
"documents:update",
|
||||
"chats:create",
|
||||
"chats:read",
|
||||
"chats:update",
|
||||
"llm_configs:read",
|
||||
"logs:read",
|
||||
"podcasts:create",
|
||||
"podcasts:read",
|
||||
"podcasts:update",
|
||||
"connectors:create",
|
||||
"connectors:read",
|
||||
"connectors:update",
|
||||
"members:read",
|
||||
"roles:read",
|
||||
],
|
||||
"is_system_role": True,
|
||||
"is_default": True,
|
||||
},
|
||||
{
|
||||
"name": "Viewer",
|
||||
"description": "Read-only access to content",
|
||||
"permissions": [
|
||||
"documents:read",
|
||||
"chats:read",
|
||||
"llm_configs:read",
|
||||
"logs:read",
|
||||
"podcasts:read",
|
||||
"connectors:read",
|
||||
"members:read",
|
||||
"roles:read",
|
||||
],
|
||||
"is_system_role": True,
|
||||
"is_default": False,
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def upgrade():
|
||||
connection = op.get_bind()
|
||||
|
||||
# Get all existing search spaces that don't have roles yet
|
||||
search_spaces = connection.execute(
|
||||
sa.text("""
|
||||
SELECT ss.id, ss.user_id
|
||||
FROM searchspaces ss
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM search_space_roles ssr
|
||||
WHERE ssr.search_space_id = ss.id
|
||||
)
|
||||
""")
|
||||
).fetchall()
|
||||
|
||||
for ss_id, owner_user_id in search_spaces:
|
||||
owner_role_id = None
|
||||
|
||||
# Create default roles for each search space
|
||||
for role in DEFAULT_ROLES:
|
||||
# Convert permissions list to PostgreSQL array literal format for raw SQL
|
||||
perms_literal = (
|
||||
"ARRAY[" + ",".join(f"'{p}'" for p in role["permissions"]) + "]::TEXT[]"
|
||||
)
|
||||
|
||||
result = connection.execute(
|
||||
sa.text(f"""
|
||||
INSERT INTO search_space_roles
|
||||
(name, description, permissions, is_default, is_system_role, search_space_id)
|
||||
VALUES (:name, :description, {perms_literal}, :is_default, :is_system_role, :search_space_id)
|
||||
RETURNING id
|
||||
"""),
|
||||
{
|
||||
"name": role["name"],
|
||||
"description": role["description"],
|
||||
"is_default": role["is_default"],
|
||||
"is_system_role": role["is_system_role"],
|
||||
"search_space_id": ss_id,
|
||||
},
|
||||
)
|
||||
role_id = result.fetchone()[0]
|
||||
|
||||
# Keep track of Owner role ID
|
||||
if role["name"] == "Owner":
|
||||
owner_role_id = role_id
|
||||
|
||||
# Create owner membership for the search space creator
|
||||
if owner_user_id and owner_role_id:
|
||||
# Check if membership already exists
|
||||
existing = connection.execute(
|
||||
sa.text("""
|
||||
SELECT 1 FROM search_space_memberships
|
||||
WHERE user_id = :user_id AND search_space_id = :search_space_id
|
||||
"""),
|
||||
{"user_id": owner_user_id, "search_space_id": ss_id},
|
||||
).fetchone()
|
||||
|
||||
if not existing:
|
||||
connection.execute(
|
||||
sa.text("""
|
||||
INSERT INTO search_space_memberships
|
||||
(user_id, search_space_id, role_id, is_owner)
|
||||
VALUES (:user_id, :search_space_id, :role_id, TRUE)
|
||||
"""),
|
||||
{
|
||||
"user_id": owner_user_id,
|
||||
"search_space_id": ss_id,
|
||||
"role_id": owner_role_id,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def downgrade():
|
||||
# This migration only adds data, not schema changes
|
||||
# Downgrade would remove all roles and memberships created by this migration
|
||||
# However, this is destructive and may affect manually created data
|
||||
# So we only remove system roles and owner memberships that were auto-created
|
||||
connection = op.get_bind()
|
||||
|
||||
# Remove memberships where user is owner and role is system Owner role
|
||||
connection.execute(
|
||||
sa.text("""
|
||||
DELETE FROM search_space_memberships ssm
|
||||
USING search_space_roles ssr
|
||||
WHERE ssm.role_id = ssr.id
|
||||
AND ssm.is_owner = TRUE
|
||||
AND ssr.is_system_role = TRUE
|
||||
AND ssr.name = 'Owner'
|
||||
""")
|
||||
)
|
||||
|
||||
# Remove system roles
|
||||
connection.execute(
|
||||
sa.text("""
|
||||
DELETE FROM search_space_roles
|
||||
WHERE is_system_role = TRUE
|
||||
""")
|
||||
)
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
"""Drop user_search_space_preferences table
|
||||
|
||||
Revision ID: 42
|
||||
Revises: 41
|
||||
Create Date: 2025-11-28
|
||||
|
||||
This table is no longer needed after RBAC implementation:
|
||||
- LLM preferences are now stored on SearchSpace directly
|
||||
- User-SearchSpace relationships are handled by SearchSpaceMembership
|
||||
"""
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "42"
|
||||
down_revision = "41"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
# Drop the user_search_space_preferences table
|
||||
op.drop_table("user_search_space_preferences")
|
||||
|
||||
|
||||
def downgrade():
|
||||
# Recreate the table if rolling back
|
||||
op.create_table(
|
||||
"user_search_space_preferences",
|
||||
sa.Column("id", sa.Integer(), primary_key=True),
|
||||
sa.Column(
|
||||
"created_at", sa.DateTime(timezone=True), server_default=sa.func.now()
|
||||
),
|
||||
sa.Column(
|
||||
"user_id",
|
||||
sa.UUID(),
|
||||
sa.ForeignKey("user.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column(
|
||||
"search_space_id",
|
||||
sa.Integer(),
|
||||
sa.ForeignKey("searchspaces.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column("long_context_llm_id", sa.Integer(), nullable=True),
|
||||
sa.Column("fast_llm_id", sa.Integer(), nullable=True),
|
||||
sa.Column("strategic_llm_id", sa.Integer(), nullable=True),
|
||||
sa.UniqueConstraint("user_id", "search_space_id", name="uq_user_searchspace"),
|
||||
)
|
||||
|
|
@ -11,7 +11,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
|||
# Additional imports for document fetching
|
||||
from sqlalchemy.future import select
|
||||
|
||||
from app.db import Document, SearchSpace
|
||||
from app.db import Document
|
||||
from app.services.connector_service import ConnectorService
|
||||
from app.services.query_service import QueryService
|
||||
|
||||
|
|
@ -92,19 +92,18 @@ def extract_sources_from_documents(
|
|||
|
||||
|
||||
async def fetch_documents_by_ids(
|
||||
document_ids: list[int], user_id: str, db_session: AsyncSession
|
||||
document_ids: list[int], search_space_id: int, db_session: AsyncSession
|
||||
) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
|
||||
"""
|
||||
Fetch documents by their IDs with ownership check using DOCUMENTS mode approach.
|
||||
Fetch documents by their IDs within a search space.
|
||||
|
||||
This function ensures that only documents belonging to the user are fetched,
|
||||
providing security by checking ownership through SearchSpace association.
|
||||
This function ensures that only documents belonging to the search space are fetched.
|
||||
Similar to SearchMode.DOCUMENTS, it fetches full documents and concatenates their chunks.
|
||||
Also creates source objects for UI display, grouped by document type.
|
||||
|
||||
Args:
|
||||
document_ids: List of document IDs to fetch
|
||||
user_id: The user ID to check ownership
|
||||
search_space_id: The search space ID to filter by
|
||||
db_session: The database session
|
||||
|
||||
Returns:
|
||||
|
|
@ -114,11 +113,12 @@ async def fetch_documents_by_ids(
|
|||
return [], []
|
||||
|
||||
try:
|
||||
# Query documents with ownership check
|
||||
# Query documents filtered by search space
|
||||
result = await db_session.execute(
|
||||
select(Document)
|
||||
.join(SearchSpace)
|
||||
.filter(Document.id.in_(document_ids), SearchSpace.user_id == user_id)
|
||||
select(Document).filter(
|
||||
Document.id.in_(document_ids),
|
||||
Document.search_space_id == search_space_id,
|
||||
)
|
||||
)
|
||||
documents = result.scalars().all()
|
||||
|
||||
|
|
@ -515,7 +515,6 @@ async def fetch_documents_by_ids(
|
|||
|
||||
async def fetch_relevant_documents(
|
||||
research_questions: list[str],
|
||||
user_id: str,
|
||||
search_space_id: int,
|
||||
db_session: AsyncSession,
|
||||
connectors_to_search: list[str],
|
||||
|
|
@ -536,7 +535,6 @@ async def fetch_relevant_documents(
|
|||
|
||||
Args:
|
||||
research_questions: List of research questions to find documents for
|
||||
user_id: The user ID
|
||||
search_space_id: The search space ID
|
||||
db_session: The database session
|
||||
connectors_to_search: List of connectors to search
|
||||
|
|
@ -619,7 +617,6 @@ async def fetch_relevant_documents(
|
|||
youtube_chunks,
|
||||
) = await connector_service.search_youtube(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
|
|
@ -646,7 +643,6 @@ async def fetch_relevant_documents(
|
|||
extension_chunks,
|
||||
) = await connector_service.search_extension(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
|
|
@ -673,7 +669,6 @@ async def fetch_relevant_documents(
|
|||
crawled_urls_chunks,
|
||||
) = await connector_service.search_crawled_urls(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
|
|
@ -689,7 +684,7 @@ async def fetch_relevant_documents(
|
|||
writer(
|
||||
{
|
||||
"yield_value": streaming_service.format_terminal_info_delta(
|
||||
f"🌐 Found {len(crawled_urls_chunks)} Web Pages chunks related to your query"
|
||||
f"🌐 Found {len(crawled_urls_chunks)} Web Page chunks related to your query"
|
||||
)
|
||||
}
|
||||
)
|
||||
|
|
@ -697,7 +692,6 @@ async def fetch_relevant_documents(
|
|||
elif connector == "FILE":
|
||||
source_object, files_chunks = await connector_service.search_files(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
|
|
@ -721,7 +715,6 @@ async def fetch_relevant_documents(
|
|||
elif connector == "SLACK_CONNECTOR":
|
||||
source_object, slack_chunks = await connector_service.search_slack(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
|
|
@ -748,7 +741,6 @@ async def fetch_relevant_documents(
|
|||
notion_chunks,
|
||||
) = await connector_service.search_notion(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
|
|
@ -775,7 +767,6 @@ async def fetch_relevant_documents(
|
|||
github_chunks,
|
||||
) = await connector_service.search_github(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
|
|
@ -802,7 +793,6 @@ async def fetch_relevant_documents(
|
|||
linear_chunks,
|
||||
) = await connector_service.search_linear(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
|
|
@ -829,7 +819,6 @@ async def fetch_relevant_documents(
|
|||
tavily_chunks,
|
||||
) = await connector_service.search_tavily(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
)
|
||||
|
|
@ -855,7 +844,6 @@ async def fetch_relevant_documents(
|
|||
searx_chunks,
|
||||
) = await connector_service.search_searxng(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
)
|
||||
|
|
@ -881,7 +869,6 @@ async def fetch_relevant_documents(
|
|||
linkup_chunks,
|
||||
) = await connector_service.search_linkup(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
mode=linkup_mode,
|
||||
)
|
||||
|
|
@ -907,7 +894,6 @@ async def fetch_relevant_documents(
|
|||
baidu_chunks,
|
||||
) = await connector_service.search_baidu(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
)
|
||||
|
|
@ -933,7 +919,6 @@ async def fetch_relevant_documents(
|
|||
discord_chunks,
|
||||
) = await connector_service.search_discord(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
|
|
@ -955,7 +940,6 @@ async def fetch_relevant_documents(
|
|||
elif connector == "JIRA_CONNECTOR":
|
||||
source_object, jira_chunks = await connector_service.search_jira(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
|
|
@ -981,7 +965,6 @@ async def fetch_relevant_documents(
|
|||
calendar_chunks,
|
||||
) = await connector_service.search_google_calendar(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
|
|
@ -1007,7 +990,6 @@ async def fetch_relevant_documents(
|
|||
airtable_chunks,
|
||||
) = await connector_service.search_airtable(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
|
|
@ -1033,7 +1015,6 @@ async def fetch_relevant_documents(
|
|||
gmail_chunks,
|
||||
) = await connector_service.search_google_gmail(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
|
|
@ -1059,7 +1040,6 @@ async def fetch_relevant_documents(
|
|||
confluence_chunks,
|
||||
) = await connector_service.search_confluence(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
|
|
@ -1085,7 +1065,6 @@ async def fetch_relevant_documents(
|
|||
clickup_chunks,
|
||||
) = await connector_service.search_clickup(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
|
|
@ -1112,7 +1091,6 @@ async def fetch_relevant_documents(
|
|||
luma_chunks,
|
||||
) = await connector_service.search_luma(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
|
|
@ -1139,7 +1117,6 @@ async def fetch_relevant_documents(
|
|||
elasticsearch_chunks,
|
||||
) = await connector_service.search_elasticsearch(
|
||||
user_query=reformulated_query,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
|
|
@ -1315,7 +1292,6 @@ async def reformulate_user_query(
|
|||
reformulated_query = await QueryService.reformulate_query_with_chat_history(
|
||||
user_query=user_query,
|
||||
session=state.db_session,
|
||||
user_id=configuration.user_id,
|
||||
search_space_id=configuration.search_space_id,
|
||||
chat_history_str=chat_history_str,
|
||||
)
|
||||
|
|
@ -1389,7 +1365,7 @@ async def handle_qna_workflow(
|
|||
user_selected_documents,
|
||||
) = await fetch_documents_by_ids(
|
||||
document_ids=configuration.document_ids_to_add_in_context,
|
||||
user_id=configuration.user_id,
|
||||
search_space_id=configuration.search_space_id,
|
||||
db_session=state.db_session,
|
||||
)
|
||||
|
||||
|
|
@ -1404,7 +1380,7 @@ async def handle_qna_workflow(
|
|||
|
||||
# Create connector service using state db_session
|
||||
connector_service = ConnectorService(
|
||||
state.db_session, user_id=configuration.user_id
|
||||
state.db_session, search_space_id=configuration.search_space_id
|
||||
)
|
||||
await connector_service.initialize_counter()
|
||||
|
||||
|
|
@ -1413,7 +1389,6 @@ async def handle_qna_workflow(
|
|||
|
||||
relevant_documents = await fetch_relevant_documents(
|
||||
research_questions=research_questions,
|
||||
user_id=configuration.user_id,
|
||||
search_space_id=configuration.search_space_id,
|
||||
db_session=state.db_session,
|
||||
connectors_to_search=configuration.connectors_to_search,
|
||||
|
|
@ -1459,7 +1434,6 @@ async def handle_qna_workflow(
|
|||
"user_query": user_query, # Use the reformulated query
|
||||
"reformulated_query": reformulated_query,
|
||||
"relevant_documents": all_documents, # Use combined documents
|
||||
"user_id": configuration.user_id,
|
||||
"search_space_id": configuration.search_space_id,
|
||||
"language": configuration.language,
|
||||
}
|
||||
|
|
@ -1551,12 +1525,11 @@ async def generate_further_questions(
|
|||
Returns:
|
||||
Dict containing the further questions in the "further_questions" key for state update.
|
||||
"""
|
||||
from app.services.llm_service import get_user_fast_llm
|
||||
from app.services.llm_service import get_fast_llm
|
||||
|
||||
# Get configuration and state data
|
||||
configuration = Configuration.from_runnable_config(config)
|
||||
chat_history = state.chat_history
|
||||
user_id = configuration.user_id
|
||||
search_space_id = configuration.search_space_id
|
||||
streaming_service = state.streaming_service
|
||||
|
||||
|
|
@ -1571,10 +1544,10 @@ async def generate_further_questions(
|
|||
}
|
||||
)
|
||||
|
||||
# Get user's fast LLM
|
||||
llm = await get_user_fast_llm(state.db_session, user_id, search_space_id)
|
||||
# Get search space's fast LLM
|
||||
llm = await get_fast_llm(state.db_session, search_space_id)
|
||||
if not llm:
|
||||
error_message = f"No fast LLM configured for user {user_id} in search space {search_space_id}"
|
||||
error_message = f"No fast LLM configured for search space {search_space_id}"
|
||||
print(error_message)
|
||||
writer({"yield_value": streaming_service.format_error(error_message)})
|
||||
|
||||
|
|
|
|||
|
|
@ -18,7 +18,6 @@ class Configuration:
|
|||
relevant_documents: list[
|
||||
Any
|
||||
] # Documents provided directly to the agent for answering
|
||||
user_id: str # User identifier
|
||||
search_space_id: int # Search space identifier
|
||||
language: str | None = None # Language for responses
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,6 @@ You are SurfSense, an advanced AI research assistant that provides detailed, wel
|
|||
{chat_history_section}
|
||||
<knowledge_sources>
|
||||
- EXTENSION: "Web content saved via SurfSense browser extension" (personal browsing history)
|
||||
- CRAWLED_URL: "Webpages indexed by SurfSense web crawler" (personally selected websites)
|
||||
- FILE: "User-uploaded documents (PDFs, Word, etc.)" (personal files)
|
||||
- SLACK_CONNECTOR: "Slack conversations and shared content" (personal workspace communications)
|
||||
- NOTION_CONNECTOR: "Notion workspace pages and databases" (personal knowledge management)
|
||||
|
|
@ -35,6 +34,7 @@ You are SurfSense, an advanced AI research assistant that provides detailed, wel
|
|||
- TAVILY_API: "Tavily search API results" (personalized search results)
|
||||
- LINKUP_API: "Linkup search API results" (personalized search results)
|
||||
- LUMA_CONNECTOR: "Luma events"
|
||||
- WEBCRAWLER_CONNECTOR: "Webpages indexed by SurfSense" (personally selected websites)
|
||||
</knowledge_sources>
|
||||
|
||||
<instructions>
|
||||
|
|
|
|||
|
|
@ -142,13 +142,12 @@ async def answer_question(state: State, config: RunnableConfig) -> dict[str, Any
|
|||
Returns:
|
||||
Dict containing the final answer in the "final_answer" key.
|
||||
"""
|
||||
from app.services.llm_service import get_user_fast_llm
|
||||
from app.services.llm_service import get_fast_llm
|
||||
|
||||
# Get configuration and relevant documents from configuration
|
||||
configuration = Configuration.from_runnable_config(config)
|
||||
documents = state.reranked_documents
|
||||
user_query = configuration.user_query
|
||||
user_id = configuration.user_id
|
||||
search_space_id = configuration.search_space_id
|
||||
language = configuration.language
|
||||
|
||||
|
|
@ -178,10 +177,10 @@ async def answer_question(state: State, config: RunnableConfig) -> dict[str, Any
|
|||
else ""
|
||||
)
|
||||
|
||||
# Get user's fast LLM
|
||||
llm = await get_user_fast_llm(state.db_session, user_id, search_space_id)
|
||||
# Get search space's fast LLM
|
||||
llm = await get_fast_llm(state.db_session, search_space_id)
|
||||
if not llm:
|
||||
error_message = f"No fast LLM configured for user {user_id} in search space {search_space_id}"
|
||||
error_message = f"No fast LLM configured for search space {search_space_id}"
|
||||
print(error_message)
|
||||
raise RuntimeError(error_message)
|
||||
|
||||
|
|
|
|||
|
|
@ -19,7 +19,6 @@ def get_connector_emoji(connector_name: str) -> str:
|
|||
connector_emojis = {
|
||||
"YOUTUBE_VIDEO": "📹",
|
||||
"EXTENSION": "🧩",
|
||||
"CRAWLED_URL": "🌐",
|
||||
"FILE": "📄",
|
||||
"SLACK_CONNECTOR": "💬",
|
||||
"NOTION_CONNECTOR": "📘",
|
||||
|
|
@ -34,6 +33,7 @@ def get_connector_emoji(connector_name: str) -> str:
|
|||
"AIRTABLE_CONNECTOR": "🗃️",
|
||||
"LUMA_CONNECTOR": "✨",
|
||||
"ELASTICSEARCH_CONNECTOR": "⚡",
|
||||
"WEBCRAWLER_CONNECTOR": "🌐",
|
||||
}
|
||||
return connector_emojis.get(connector_name, "🔎")
|
||||
|
||||
|
|
@ -43,7 +43,6 @@ def get_connector_friendly_name(connector_name: str) -> str:
|
|||
connector_friendly_names = {
|
||||
"YOUTUBE_VIDEO": "YouTube",
|
||||
"EXTENSION": "Browser Extension",
|
||||
"CRAWLED_URL": "Web Pages",
|
||||
"FILE": "Files",
|
||||
"SLACK_CONNECTOR": "Slack",
|
||||
"NOTION_CONNECTOR": "Notion",
|
||||
|
|
@ -59,6 +58,7 @@ def get_connector_friendly_name(connector_name: str) -> str:
|
|||
"AIRTABLE_CONNECTOR": "Airtable",
|
||||
"LUMA_CONNECTOR": "Luma",
|
||||
"ELASTICSEARCH_CONNECTOR": "Elasticsearch",
|
||||
"WEBCRAWLER_CONNECTOR": "Web Pages",
|
||||
}
|
||||
return connector_friendly_names.get(connector_name, connector_name)
|
||||
|
||||
|
|
|
|||
|
|
@ -208,9 +208,6 @@ class Config:
|
|||
# LlamaCloud API Key
|
||||
LLAMA_CLOUD_API_KEY = os.getenv("LLAMA_CLOUD_API_KEY")
|
||||
|
||||
# Firecrawl API Key
|
||||
FIRECRAWL_API_KEY = os.getenv("FIRECRAWL_API_KEY", None)
|
||||
|
||||
# Litellm TTS Configuration
|
||||
TTS_SERVICE = os.getenv("TTS_SERVICE")
|
||||
TTS_SERVICE_API_BASE = os.getenv("TTS_SERVICE_API_BASE")
|
||||
|
|
|
|||
188
surfsense_backend/app/connectors/webcrawler_connector.py
Normal file
188
surfsense_backend/app/connectors/webcrawler_connector.py
Normal file
|
|
@ -0,0 +1,188 @@
|
|||
"""
|
||||
WebCrawler Connector Module
|
||||
|
||||
A module for crawling web pages and extracting content using Firecrawl or AsyncChromiumLoader.
|
||||
Provides a unified interface for web scraping.
|
||||
"""
|
||||
|
||||
from typing import Any
|
||||
|
||||
import validators
|
||||
from firecrawl import AsyncFirecrawlApp
|
||||
from langchain_community.document_loaders import AsyncChromiumLoader
|
||||
|
||||
|
||||
class WebCrawlerConnector:
|
||||
"""Class for crawling web pages and extracting content."""
|
||||
|
||||
def __init__(self, firecrawl_api_key: str | None = None):
|
||||
"""
|
||||
Initialize the WebCrawlerConnector class.
|
||||
|
||||
Args:
|
||||
firecrawl_api_key: Firecrawl API key (optional, will use AsyncChromiumLoader if not provided)
|
||||
"""
|
||||
self.firecrawl_api_key = firecrawl_api_key
|
||||
self.use_firecrawl = bool(firecrawl_api_key)
|
||||
|
||||
def set_api_key(self, api_key: str) -> None:
|
||||
"""
|
||||
Set the Firecrawl API key and enable Firecrawl usage.
|
||||
|
||||
Args:
|
||||
api_key: Firecrawl API key
|
||||
"""
|
||||
self.firecrawl_api_key = api_key
|
||||
self.use_firecrawl = True
|
||||
|
||||
async def crawl_url(
|
||||
self, url: str, formats: list[str] | None = None
|
||||
) -> tuple[dict[str, Any] | None, str | None]:
|
||||
"""
|
||||
Crawl a single URL and extract its content.
|
||||
|
||||
Args:
|
||||
url: URL to crawl
|
||||
formats: List of formats to extract (e.g., ["markdown", "html"]) - only for Firecrawl
|
||||
|
||||
Returns:
|
||||
Tuple containing (crawl result dict, error message or None)
|
||||
Result dict contains:
|
||||
- content: Extracted content (markdown or HTML)
|
||||
- metadata: Page metadata (title, description, etc.)
|
||||
- source: Original URL
|
||||
- crawler_type: Type of crawler used
|
||||
"""
|
||||
try:
|
||||
# Validate URL
|
||||
if not validators.url(url):
|
||||
return None, f"Invalid URL: {url}"
|
||||
|
||||
if self.use_firecrawl:
|
||||
result = await self._crawl_with_firecrawl(url, formats)
|
||||
else:
|
||||
result = await self._crawl_with_chromium(url)
|
||||
|
||||
return result, None
|
||||
|
||||
except Exception as e:
|
||||
return None, f"Error crawling URL {url}: {e!s}"
|
||||
|
||||
async def _crawl_with_firecrawl(
|
||||
self, url: str, formats: list[str] | None = None
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Crawl URL using Firecrawl.
|
||||
|
||||
Args:
|
||||
url: URL to crawl
|
||||
formats: List of formats to extract
|
||||
|
||||
Returns:
|
||||
Dict containing crawled content and metadata
|
||||
|
||||
Raises:
|
||||
ValueError: If Firecrawl scraping fails
|
||||
"""
|
||||
if not self.firecrawl_api_key:
|
||||
raise ValueError("Firecrawl API key not set. Call set_api_key() first.")
|
||||
|
||||
firecrawl_app = AsyncFirecrawlApp(api_key=self.firecrawl_api_key)
|
||||
|
||||
# Default to markdown format
|
||||
if formats is None:
|
||||
formats = ["markdown"]
|
||||
|
||||
# v2 API returns Document directly and raises an exception on failure
|
||||
scrape_result = await firecrawl_app.scrape(url, formats=formats)
|
||||
|
||||
if not scrape_result:
|
||||
raise ValueError("Firecrawl returned no result")
|
||||
|
||||
# Extract content based on format
|
||||
content = scrape_result.markdown or scrape_result.html or ""
|
||||
|
||||
# Extract metadata - v2 returns DocumentMetadata object
|
||||
metadata_obj = scrape_result.metadata
|
||||
metadata = metadata_obj.model_dump() if metadata_obj else {}
|
||||
|
||||
return {
|
||||
"content": content,
|
||||
"metadata": {
|
||||
"source": url,
|
||||
"title": metadata.get("title", url),
|
||||
"description": metadata.get("description", ""),
|
||||
"language": metadata.get("language", ""),
|
||||
"sourceURL": metadata.get("source_url", url),
|
||||
**metadata,
|
||||
},
|
||||
"crawler_type": "firecrawl",
|
||||
}
|
||||
|
||||
async def _crawl_with_chromium(self, url: str) -> dict[str, Any]:
|
||||
"""
|
||||
Crawl URL using AsyncChromiumLoader.
|
||||
|
||||
Args:
|
||||
url: URL to crawl
|
||||
|
||||
Returns:
|
||||
Dict containing crawled content and metadata
|
||||
|
||||
Raises:
|
||||
Exception: If crawling fails
|
||||
"""
|
||||
crawl_loader = AsyncChromiumLoader(urls=[url], headless=True)
|
||||
documents = await crawl_loader.aload()
|
||||
|
||||
if not documents:
|
||||
raise ValueError(f"Failed to load content from {url}")
|
||||
|
||||
doc = documents[0]
|
||||
|
||||
# Extract basic metadata from the document
|
||||
metadata = doc.metadata if doc.metadata else {}
|
||||
|
||||
return {
|
||||
"content": doc.page_content,
|
||||
"metadata": {
|
||||
"source": url,
|
||||
"title": metadata.get("title", url),
|
||||
**metadata,
|
||||
},
|
||||
"crawler_type": "chromium",
|
||||
}
|
||||
|
||||
def format_to_structured_document(self, crawl_result: dict[str, Any]) -> str:
|
||||
"""
|
||||
Format crawl result as a structured document.
|
||||
|
||||
Args:
|
||||
crawl_result: Result from crawl_url method
|
||||
|
||||
Returns:
|
||||
Structured document string
|
||||
"""
|
||||
metadata = crawl_result["metadata"]
|
||||
content = crawl_result["content"]
|
||||
|
||||
document_parts = ["<DOCUMENT>", "<METADATA>"]
|
||||
|
||||
# Add all metadata fields
|
||||
for key, value in metadata.items():
|
||||
document_parts.append(f"{key.upper()}: {value}")
|
||||
|
||||
document_parts.extend(
|
||||
[
|
||||
"</METADATA>",
|
||||
"<CONTENT>",
|
||||
"FORMAT: markdown",
|
||||
"TEXT_START",
|
||||
content,
|
||||
"TEXT_END",
|
||||
"</CONTENT>",
|
||||
"</DOCUMENT>",
|
||||
]
|
||||
)
|
||||
|
||||
return "\n".join(document_parts)
|
||||
|
|
@ -73,6 +73,7 @@ class SearchSourceConnectorType(str, Enum):
|
|||
AIRTABLE_CONNECTOR = "AIRTABLE_CONNECTOR"
|
||||
LUMA_CONNECTOR = "LUMA_CONNECTOR"
|
||||
ELASTICSEARCH_CONNECTOR = "ELASTICSEARCH_CONNECTOR"
|
||||
WEBCRAWLER_CONNECTOR = "WEBCRAWLER_CONNECTOR"
|
||||
|
||||
|
||||
class ChatType(str, Enum):
|
||||
|
|
@ -130,6 +131,169 @@ class LogStatus(str, Enum):
|
|||
FAILED = "FAILED"
|
||||
|
||||
|
||||
class Permission(str, Enum):
|
||||
"""
|
||||
Granular permissions for search space resources.
|
||||
Use '*' (FULL_ACCESS) to grant all permissions.
|
||||
"""
|
||||
|
||||
# Documents
|
||||
DOCUMENTS_CREATE = "documents:create"
|
||||
DOCUMENTS_READ = "documents:read"
|
||||
DOCUMENTS_UPDATE = "documents:update"
|
||||
DOCUMENTS_DELETE = "documents:delete"
|
||||
|
||||
# Chats
|
||||
CHATS_CREATE = "chats:create"
|
||||
CHATS_READ = "chats:read"
|
||||
CHATS_UPDATE = "chats:update"
|
||||
CHATS_DELETE = "chats:delete"
|
||||
|
||||
# LLM Configs
|
||||
LLM_CONFIGS_CREATE = "llm_configs:create"
|
||||
LLM_CONFIGS_READ = "llm_configs:read"
|
||||
LLM_CONFIGS_UPDATE = "llm_configs:update"
|
||||
LLM_CONFIGS_DELETE = "llm_configs:delete"
|
||||
|
||||
# Podcasts
|
||||
PODCASTS_CREATE = "podcasts:create"
|
||||
PODCASTS_READ = "podcasts:read"
|
||||
PODCASTS_UPDATE = "podcasts:update"
|
||||
PODCASTS_DELETE = "podcasts:delete"
|
||||
|
||||
# Connectors
|
||||
CONNECTORS_CREATE = "connectors:create"
|
||||
CONNECTORS_READ = "connectors:read"
|
||||
CONNECTORS_UPDATE = "connectors:update"
|
||||
CONNECTORS_DELETE = "connectors:delete"
|
||||
|
||||
# Logs
|
||||
LOGS_READ = "logs:read"
|
||||
LOGS_DELETE = "logs:delete"
|
||||
|
||||
# Members
|
||||
MEMBERS_INVITE = "members:invite"
|
||||
MEMBERS_VIEW = "members:view"
|
||||
MEMBERS_REMOVE = "members:remove"
|
||||
MEMBERS_MANAGE_ROLES = "members:manage_roles"
|
||||
|
||||
# Roles
|
||||
ROLES_CREATE = "roles:create"
|
||||
ROLES_READ = "roles:read"
|
||||
ROLES_UPDATE = "roles:update"
|
||||
ROLES_DELETE = "roles:delete"
|
||||
|
||||
# Search Space Settings
|
||||
SETTINGS_VIEW = "settings:view"
|
||||
SETTINGS_UPDATE = "settings:update"
|
||||
SETTINGS_DELETE = "settings:delete" # Delete the entire search space
|
||||
|
||||
# Full access wildcard
|
||||
FULL_ACCESS = "*"
|
||||
|
||||
|
||||
# Predefined role permission sets for convenience
|
||||
DEFAULT_ROLE_PERMISSIONS = {
|
||||
"Owner": [Permission.FULL_ACCESS.value],
|
||||
"Admin": [
|
||||
# Documents
|
||||
Permission.DOCUMENTS_CREATE.value,
|
||||
Permission.DOCUMENTS_READ.value,
|
||||
Permission.DOCUMENTS_UPDATE.value,
|
||||
Permission.DOCUMENTS_DELETE.value,
|
||||
# Chats
|
||||
Permission.CHATS_CREATE.value,
|
||||
Permission.CHATS_READ.value,
|
||||
Permission.CHATS_UPDATE.value,
|
||||
Permission.CHATS_DELETE.value,
|
||||
# LLM Configs
|
||||
Permission.LLM_CONFIGS_CREATE.value,
|
||||
Permission.LLM_CONFIGS_READ.value,
|
||||
Permission.LLM_CONFIGS_UPDATE.value,
|
||||
Permission.LLM_CONFIGS_DELETE.value,
|
||||
# Podcasts
|
||||
Permission.PODCASTS_CREATE.value,
|
||||
Permission.PODCASTS_READ.value,
|
||||
Permission.PODCASTS_UPDATE.value,
|
||||
Permission.PODCASTS_DELETE.value,
|
||||
# Connectors
|
||||
Permission.CONNECTORS_CREATE.value,
|
||||
Permission.CONNECTORS_READ.value,
|
||||
Permission.CONNECTORS_UPDATE.value,
|
||||
Permission.CONNECTORS_DELETE.value,
|
||||
# Logs
|
||||
Permission.LOGS_READ.value,
|
||||
Permission.LOGS_DELETE.value,
|
||||
# Members
|
||||
Permission.MEMBERS_INVITE.value,
|
||||
Permission.MEMBERS_VIEW.value,
|
||||
Permission.MEMBERS_REMOVE.value,
|
||||
Permission.MEMBERS_MANAGE_ROLES.value,
|
||||
# Roles
|
||||
Permission.ROLES_CREATE.value,
|
||||
Permission.ROLES_READ.value,
|
||||
Permission.ROLES_UPDATE.value,
|
||||
Permission.ROLES_DELETE.value,
|
||||
# Settings (no delete)
|
||||
Permission.SETTINGS_VIEW.value,
|
||||
Permission.SETTINGS_UPDATE.value,
|
||||
],
|
||||
"Editor": [
|
||||
# Documents
|
||||
Permission.DOCUMENTS_CREATE.value,
|
||||
Permission.DOCUMENTS_READ.value,
|
||||
Permission.DOCUMENTS_UPDATE.value,
|
||||
Permission.DOCUMENTS_DELETE.value,
|
||||
# Chats
|
||||
Permission.CHATS_CREATE.value,
|
||||
Permission.CHATS_READ.value,
|
||||
Permission.CHATS_UPDATE.value,
|
||||
Permission.CHATS_DELETE.value,
|
||||
# LLM Configs (read only)
|
||||
Permission.LLM_CONFIGS_READ.value,
|
||||
Permission.LLM_CONFIGS_CREATE.value,
|
||||
Permission.LLM_CONFIGS_UPDATE.value,
|
||||
# Podcasts
|
||||
Permission.PODCASTS_CREATE.value,
|
||||
Permission.PODCASTS_READ.value,
|
||||
Permission.PODCASTS_UPDATE.value,
|
||||
Permission.PODCASTS_DELETE.value,
|
||||
# Connectors (full access for editors)
|
||||
Permission.CONNECTORS_CREATE.value,
|
||||
Permission.CONNECTORS_READ.value,
|
||||
Permission.CONNECTORS_UPDATE.value,
|
||||
# Logs
|
||||
Permission.LOGS_READ.value,
|
||||
# Members (view only)
|
||||
Permission.MEMBERS_VIEW.value,
|
||||
# Roles (read only)
|
||||
Permission.ROLES_READ.value,
|
||||
# Settings (view only)
|
||||
Permission.SETTINGS_VIEW.value,
|
||||
],
|
||||
"Viewer": [
|
||||
# Documents (read only)
|
||||
Permission.DOCUMENTS_READ.value,
|
||||
# Chats (read only)
|
||||
Permission.CHATS_READ.value,
|
||||
# LLM Configs (read only)
|
||||
Permission.LLM_CONFIGS_READ.value,
|
||||
# Podcasts (read only)
|
||||
Permission.PODCASTS_READ.value,
|
||||
# Connectors (read only)
|
||||
Permission.CONNECTORS_READ.value,
|
||||
# Logs (read only)
|
||||
Permission.LOGS_READ.value,
|
||||
# Members (view only)
|
||||
Permission.MEMBERS_VIEW.value,
|
||||
# Roles (read only)
|
||||
Permission.ROLES_READ.value,
|
||||
# Settings (view only)
|
||||
Permission.SETTINGS_VIEW.value,
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
class Base(DeclarativeBase):
|
||||
pass
|
||||
|
||||
|
|
@ -240,6 +404,13 @@ class SearchSpace(BaseModel, TimestampMixin):
|
|||
qna_custom_instructions = Column(
|
||||
Text, nullable=True, default=""
|
||||
) # User's custom instructions
|
||||
|
||||
# Search space-level LLM preferences (shared by all members)
|
||||
# Note: These can be negative IDs for global configs (from YAML) or positive IDs for custom configs (from DB)
|
||||
long_context_llm_id = Column(Integer, nullable=True)
|
||||
fast_llm_id = Column(Integer, nullable=True)
|
||||
strategic_llm_id = Column(Integer, nullable=True)
|
||||
|
||||
user_id = Column(
|
||||
UUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=False
|
||||
)
|
||||
|
|
@ -281,9 +452,24 @@ class SearchSpace(BaseModel, TimestampMixin):
|
|||
order_by="LLMConfig.id",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
user_preferences = relationship(
|
||||
"UserSearchSpacePreference",
|
||||
|
||||
# RBAC relationships
|
||||
roles = relationship(
|
||||
"SearchSpaceRole",
|
||||
back_populates="search_space",
|
||||
order_by="SearchSpaceRole.id",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
memberships = relationship(
|
||||
"SearchSpaceMembership",
|
||||
back_populates="search_space",
|
||||
order_by="SearchSpaceMembership.id",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
invites = relationship(
|
||||
"SearchSpaceInvite",
|
||||
back_populates="search_space",
|
||||
order_by="SearchSpaceInvite.id",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
|
||||
|
|
@ -347,45 +533,6 @@ class LLMConfig(BaseModel, TimestampMixin):
|
|||
search_space = relationship("SearchSpace", back_populates="llm_configs")
|
||||
|
||||
|
||||
class UserSearchSpacePreference(BaseModel, TimestampMixin):
|
||||
__tablename__ = "user_search_space_preferences"
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"user_id",
|
||||
"search_space_id",
|
||||
name="uq_user_searchspace",
|
||||
),
|
||||
)
|
||||
|
||||
user_id = Column(
|
||||
UUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=False
|
||||
)
|
||||
search_space_id = Column(
|
||||
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
|
||||
)
|
||||
|
||||
# User-specific LLM preferences for this search space
|
||||
# Note: These can be negative IDs for global configs (from YAML) or positive IDs for custom configs (from DB)
|
||||
# Foreign keys removed to support global configs with negative IDs
|
||||
long_context_llm_id = Column(Integer, nullable=True)
|
||||
fast_llm_id = Column(Integer, nullable=True)
|
||||
strategic_llm_id = Column(Integer, nullable=True)
|
||||
|
||||
# Future RBAC fields can be added here
|
||||
# role = Column(String(50), nullable=True) # e.g., 'owner', 'editor', 'viewer'
|
||||
# permissions = Column(JSON, nullable=True)
|
||||
|
||||
user = relationship("User", back_populates="search_space_preferences")
|
||||
search_space = relationship("SearchSpace", back_populates="user_preferences")
|
||||
|
||||
# Note: Relationships removed because foreign keys no longer exist
|
||||
# Global configs (negative IDs) don't exist in llm_configs table
|
||||
# Application code manually fetches configs when needed
|
||||
# long_context_llm = relationship("LLMConfig", foreign_keys=[long_context_llm_id], post_update=True)
|
||||
# fast_llm = relationship("LLMConfig", foreign_keys=[fast_llm_id], post_update=True)
|
||||
# strategic_llm = relationship("LLMConfig", foreign_keys=[strategic_llm_id], post_update=True)
|
||||
|
||||
|
||||
class Log(BaseModel, TimestampMixin):
|
||||
__tablename__ = "logs"
|
||||
|
||||
|
|
@ -403,6 +550,140 @@ class Log(BaseModel, TimestampMixin):
|
|||
search_space = relationship("SearchSpace", back_populates="logs")
|
||||
|
||||
|
||||
class SearchSpaceRole(BaseModel, TimestampMixin):
|
||||
"""
|
||||
Custom roles that can be defined per search space.
|
||||
Each search space can have multiple roles with different permission sets.
|
||||
"""
|
||||
|
||||
__tablename__ = "search_space_roles"
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"search_space_id",
|
||||
"name",
|
||||
name="uq_searchspace_role_name",
|
||||
),
|
||||
)
|
||||
|
||||
name = Column(String(100), nullable=False, index=True)
|
||||
description = Column(String(500), nullable=True)
|
||||
# List of Permission enum values (e.g., ["documents:read", "chats:create"])
|
||||
permissions = Column(ARRAY(String), nullable=False, default=[])
|
||||
# Whether this role is assigned to new members by default when they join via invite
|
||||
is_default = Column(Boolean, nullable=False, default=False)
|
||||
# System roles (Owner, Admin, Editor, Viewer) cannot be deleted
|
||||
is_system_role = Column(Boolean, nullable=False, default=False)
|
||||
|
||||
search_space_id = Column(
|
||||
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
|
||||
)
|
||||
search_space = relationship("SearchSpace", back_populates="roles")
|
||||
|
||||
memberships = relationship(
|
||||
"SearchSpaceMembership", back_populates="role", passive_deletes=True
|
||||
)
|
||||
invites = relationship(
|
||||
"SearchSpaceInvite", back_populates="role", passive_deletes=True
|
||||
)
|
||||
|
||||
|
||||
class SearchSpaceMembership(BaseModel, TimestampMixin):
|
||||
"""
|
||||
Tracks user membership in search spaces with their assigned role.
|
||||
Each user can be a member of multiple search spaces with different roles.
|
||||
"""
|
||||
|
||||
__tablename__ = "search_space_memberships"
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"user_id",
|
||||
"search_space_id",
|
||||
name="uq_user_searchspace_membership",
|
||||
),
|
||||
)
|
||||
|
||||
user_id = Column(
|
||||
UUID(as_uuid=True), ForeignKey("user.id", ondelete="CASCADE"), nullable=False
|
||||
)
|
||||
search_space_id = Column(
|
||||
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
|
||||
)
|
||||
role_id = Column(
|
||||
Integer,
|
||||
ForeignKey("search_space_roles.id", ondelete="SET NULL"),
|
||||
nullable=True,
|
||||
)
|
||||
# Indicates if this user is the original creator/owner of the search space
|
||||
is_owner = Column(Boolean, nullable=False, default=False)
|
||||
# Timestamp when the user joined (via invite or as creator)
|
||||
joined_at = Column(
|
||||
TIMESTAMP(timezone=True),
|
||||
nullable=False,
|
||||
default=lambda: datetime.now(UTC),
|
||||
)
|
||||
# Reference to the invite used to join (null if owner/creator)
|
||||
invited_by_invite_id = Column(
|
||||
Integer,
|
||||
ForeignKey("search_space_invites.id", ondelete="SET NULL"),
|
||||
nullable=True,
|
||||
)
|
||||
|
||||
user = relationship("User", back_populates="search_space_memberships")
|
||||
search_space = relationship("SearchSpace", back_populates="memberships")
|
||||
role = relationship("SearchSpaceRole", back_populates="memberships")
|
||||
invited_by_invite = relationship(
|
||||
"SearchSpaceInvite", back_populates="used_by_memberships"
|
||||
)
|
||||
|
||||
|
||||
class SearchSpaceInvite(BaseModel, TimestampMixin):
|
||||
"""
|
||||
Invite links for search spaces.
|
||||
Users can create invite links with specific roles that others can use to join.
|
||||
"""
|
||||
|
||||
__tablename__ = "search_space_invites"
|
||||
|
||||
# Unique invite code (used in invite URLs)
|
||||
invite_code = Column(String(64), nullable=False, unique=True, index=True)
|
||||
|
||||
search_space_id = Column(
|
||||
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
|
||||
)
|
||||
# Role to assign when invite is used (null means use default role)
|
||||
role_id = Column(
|
||||
Integer,
|
||||
ForeignKey("search_space_roles.id", ondelete="SET NULL"),
|
||||
nullable=True,
|
||||
)
|
||||
# User who created this invite
|
||||
created_by_id = Column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("user.id", ondelete="SET NULL"),
|
||||
nullable=True,
|
||||
)
|
||||
|
||||
# Expiration timestamp (null means never expires)
|
||||
expires_at = Column(TIMESTAMP(timezone=True), nullable=True)
|
||||
# Maximum number of times this invite can be used (null means unlimited)
|
||||
max_uses = Column(Integer, nullable=True)
|
||||
# Number of times this invite has been used
|
||||
uses_count = Column(Integer, nullable=False, default=0)
|
||||
# Whether this invite is currently active
|
||||
is_active = Column(Boolean, nullable=False, default=True)
|
||||
# Optional custom name/label for the invite
|
||||
name = Column(String(100), nullable=True)
|
||||
|
||||
search_space = relationship("SearchSpace", back_populates="invites")
|
||||
role = relationship("SearchSpaceRole", back_populates="invites")
|
||||
created_by = relationship("User", back_populates="created_invites")
|
||||
used_by_memberships = relationship(
|
||||
"SearchSpaceMembership",
|
||||
back_populates="invited_by_invite",
|
||||
passive_deletes=True,
|
||||
)
|
||||
|
||||
|
||||
if config.AUTH_TYPE == "GOOGLE":
|
||||
|
||||
class OAuthAccount(SQLAlchemyBaseOAuthAccountTableUUID, Base):
|
||||
|
|
@ -413,11 +694,18 @@ if config.AUTH_TYPE == "GOOGLE":
|
|||
"OAuthAccount", lazy="joined"
|
||||
)
|
||||
search_spaces = relationship("SearchSpace", back_populates="user")
|
||||
search_space_preferences = relationship(
|
||||
"UserSearchSpacePreference",
|
||||
|
||||
# RBAC relationships
|
||||
search_space_memberships = relationship(
|
||||
"SearchSpaceMembership",
|
||||
back_populates="user",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
created_invites = relationship(
|
||||
"SearchSpaceInvite",
|
||||
back_populates="created_by",
|
||||
passive_deletes=True,
|
||||
)
|
||||
|
||||
# Page usage tracking for ETL services
|
||||
pages_limit = Column(Integer, nullable=False, default=500, server_default="500")
|
||||
|
|
@ -427,11 +715,18 @@ else:
|
|||
|
||||
class User(SQLAlchemyBaseUserTableUUID, Base):
|
||||
search_spaces = relationship("SearchSpace", back_populates="user")
|
||||
search_space_preferences = relationship(
|
||||
"UserSearchSpacePreference",
|
||||
|
||||
# RBAC relationships
|
||||
search_space_memberships = relationship(
|
||||
"SearchSpaceMembership",
|
||||
back_populates="user",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
created_invites = relationship(
|
||||
"SearchSpaceInvite",
|
||||
back_populates="created_by",
|
||||
passive_deletes=True,
|
||||
)
|
||||
|
||||
# Page usage tracking for ETL services
|
||||
pages_limit = Column(Integer, nullable=False, default=500, server_default="500")
|
||||
|
|
@ -502,3 +797,109 @@ async def get_documents_hybrid_search_retriever(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
):
|
||||
return DocumentHybridSearchRetriever(session)
|
||||
|
||||
|
||||
def has_permission(user_permissions: list[str], required_permission: str) -> bool:
|
||||
"""
|
||||
Check if the user has the required permission.
|
||||
Supports wildcard (*) for full access.
|
||||
|
||||
Args:
|
||||
user_permissions: List of permission strings the user has
|
||||
required_permission: The permission string to check for
|
||||
|
||||
Returns:
|
||||
True if user has the permission, False otherwise
|
||||
"""
|
||||
if not user_permissions:
|
||||
return False
|
||||
|
||||
# Full access wildcard grants all permissions
|
||||
if Permission.FULL_ACCESS.value in user_permissions:
|
||||
return True
|
||||
|
||||
return required_permission in user_permissions
|
||||
|
||||
|
||||
def has_any_permission(
|
||||
user_permissions: list[str], required_permissions: list[str]
|
||||
) -> bool:
|
||||
"""
|
||||
Check if the user has any of the required permissions.
|
||||
|
||||
Args:
|
||||
user_permissions: List of permission strings the user has
|
||||
required_permissions: List of permission strings to check for (any match)
|
||||
|
||||
Returns:
|
||||
True if user has at least one of the permissions, False otherwise
|
||||
"""
|
||||
if not user_permissions:
|
||||
return False
|
||||
|
||||
if Permission.FULL_ACCESS.value in user_permissions:
|
||||
return True
|
||||
|
||||
return any(perm in user_permissions for perm in required_permissions)
|
||||
|
||||
|
||||
def has_all_permissions(
|
||||
user_permissions: list[str], required_permissions: list[str]
|
||||
) -> bool:
|
||||
"""
|
||||
Check if the user has all of the required permissions.
|
||||
|
||||
Args:
|
||||
user_permissions: List of permission strings the user has
|
||||
required_permissions: List of permission strings to check for (all must match)
|
||||
|
||||
Returns:
|
||||
True if user has all of the permissions, False otherwise
|
||||
"""
|
||||
if not user_permissions:
|
||||
return False
|
||||
|
||||
if Permission.FULL_ACCESS.value in user_permissions:
|
||||
return True
|
||||
|
||||
return all(perm in user_permissions for perm in required_permissions)
|
||||
|
||||
|
||||
def get_default_roles_config() -> list[dict]:
|
||||
"""
|
||||
Get the configuration for default system roles.
|
||||
These roles are created automatically when a search space is created.
|
||||
|
||||
Returns:
|
||||
List of role configurations with name, description, permissions, and flags
|
||||
"""
|
||||
return [
|
||||
{
|
||||
"name": "Owner",
|
||||
"description": "Full access to all search space resources and settings",
|
||||
"permissions": DEFAULT_ROLE_PERMISSIONS["Owner"],
|
||||
"is_default": False,
|
||||
"is_system_role": True,
|
||||
},
|
||||
{
|
||||
"name": "Admin",
|
||||
"description": "Can manage most resources except deleting the search space",
|
||||
"permissions": DEFAULT_ROLE_PERMISSIONS["Admin"],
|
||||
"is_default": False,
|
||||
"is_system_role": True,
|
||||
},
|
||||
{
|
||||
"name": "Editor",
|
||||
"description": "Can create and edit documents, chats, and podcasts",
|
||||
"permissions": DEFAULT_ROLE_PERMISSIONS["Editor"],
|
||||
"is_default": True, # Default role for new members via invite
|
||||
"is_system_role": True,
|
||||
},
|
||||
{
|
||||
"name": "Viewer",
|
||||
"description": "Read-only access to search space resources",
|
||||
"permissions": DEFAULT_ROLE_PERMISSIONS["Viewer"],
|
||||
"is_default": False,
|
||||
"is_system_role": True,
|
||||
},
|
||||
]
|
||||
|
|
|
|||
|
|
@ -12,8 +12,7 @@ class ChucksHybridSearchRetriever:
|
|||
self,
|
||||
query_text: str,
|
||||
top_k: int,
|
||||
user_id: str,
|
||||
search_space_id: int | None = None,
|
||||
search_space_id: int,
|
||||
) -> list:
|
||||
"""
|
||||
Perform vector similarity search on chunks.
|
||||
|
|
@ -21,8 +20,7 @@ class ChucksHybridSearchRetriever:
|
|||
Args:
|
||||
query_text: The search query text
|
||||
top_k: Number of results to return
|
||||
user_id: The ID of the user performing the search
|
||||
search_space_id: Optional search space ID to filter results
|
||||
search_space_id: The search space ID to search within
|
||||
|
||||
Returns:
|
||||
List of chunks sorted by vector similarity
|
||||
|
|
@ -31,25 +29,20 @@ class ChucksHybridSearchRetriever:
|
|||
from sqlalchemy.orm import joinedload
|
||||
|
||||
from app.config import config
|
||||
from app.db import Chunk, Document, SearchSpace
|
||||
from app.db import Chunk, Document
|
||||
|
||||
# Get embedding for the query
|
||||
embedding_model = config.embedding_model_instance
|
||||
query_embedding = embedding_model.embed(query_text)
|
||||
|
||||
# Build the base query with user ownership check
|
||||
# Build the query filtered by search space
|
||||
query = (
|
||||
select(Chunk)
|
||||
.options(joinedload(Chunk.document).joinedload(Document.search_space))
|
||||
.join(Document, Chunk.document_id == Document.id)
|
||||
.join(SearchSpace, Document.search_space_id == SearchSpace.id)
|
||||
.where(SearchSpace.user_id == user_id)
|
||||
.where(Document.search_space_id == search_space_id)
|
||||
)
|
||||
|
||||
# Add search space filter if provided
|
||||
if search_space_id is not None:
|
||||
query = query.where(Document.search_space_id == search_space_id)
|
||||
|
||||
# Add vector similarity ordering
|
||||
query = query.order_by(Chunk.embedding.op("<=>")(query_embedding)).limit(top_k)
|
||||
|
||||
|
|
@ -63,8 +56,7 @@ class ChucksHybridSearchRetriever:
|
|||
self,
|
||||
query_text: str,
|
||||
top_k: int,
|
||||
user_id: str,
|
||||
search_space_id: int | None = None,
|
||||
search_space_id: int,
|
||||
) -> list:
|
||||
"""
|
||||
Perform full-text keyword search on chunks.
|
||||
|
|
@ -72,8 +64,7 @@ class ChucksHybridSearchRetriever:
|
|||
Args:
|
||||
query_text: The search query text
|
||||
top_k: Number of results to return
|
||||
user_id: The ID of the user performing the search
|
||||
search_space_id: Optional search space ID to filter results
|
||||
search_space_id: The search space ID to search within
|
||||
|
||||
Returns:
|
||||
List of chunks sorted by text relevance
|
||||
|
|
@ -81,28 +72,23 @@ class ChucksHybridSearchRetriever:
|
|||
from sqlalchemy import func, select
|
||||
from sqlalchemy.orm import joinedload
|
||||
|
||||
from app.db import Chunk, Document, SearchSpace
|
||||
from app.db import Chunk, Document
|
||||
|
||||
# Create tsvector and tsquery for PostgreSQL full-text search
|
||||
tsvector = func.to_tsvector("english", Chunk.content)
|
||||
tsquery = func.plainto_tsquery("english", query_text)
|
||||
|
||||
# Build the base query with user ownership check
|
||||
# Build the query filtered by search space
|
||||
query = (
|
||||
select(Chunk)
|
||||
.options(joinedload(Chunk.document).joinedload(Document.search_space))
|
||||
.join(Document, Chunk.document_id == Document.id)
|
||||
.join(SearchSpace, Document.search_space_id == SearchSpace.id)
|
||||
.where(SearchSpace.user_id == user_id)
|
||||
.where(Document.search_space_id == search_space_id)
|
||||
.where(
|
||||
tsvector.op("@@")(tsquery)
|
||||
) # Only include results that match the query
|
||||
)
|
||||
|
||||
# Add search space filter if provided
|
||||
if search_space_id is not None:
|
||||
query = query.where(Document.search_space_id == search_space_id)
|
||||
|
||||
# Add text search ranking
|
||||
query = query.order_by(func.ts_rank_cd(tsvector, tsquery).desc()).limit(top_k)
|
||||
|
||||
|
|
@ -116,8 +102,7 @@ class ChucksHybridSearchRetriever:
|
|||
self,
|
||||
query_text: str,
|
||||
top_k: int,
|
||||
user_id: str,
|
||||
search_space_id: int | None = None,
|
||||
search_space_id: int,
|
||||
document_type: str | None = None,
|
||||
) -> list:
|
||||
"""
|
||||
|
|
@ -126,8 +111,7 @@ class ChucksHybridSearchRetriever:
|
|||
Args:
|
||||
query_text: The search query text
|
||||
top_k: Number of results to return
|
||||
user_id: The ID of the user performing the search
|
||||
search_space_id: Optional search space ID to filter results
|
||||
search_space_id: The search space ID to search within
|
||||
document_type: Optional document type to filter results (e.g., "FILE", "CRAWLED_URL")
|
||||
|
||||
Returns:
|
||||
|
|
@ -137,7 +121,7 @@ class ChucksHybridSearchRetriever:
|
|||
from sqlalchemy.orm import joinedload
|
||||
|
||||
from app.config import config
|
||||
from app.db import Chunk, Document, DocumentType, SearchSpace
|
||||
from app.db import Chunk, Document, DocumentType
|
||||
|
||||
# Get embedding for the query
|
||||
embedding_model = config.embedding_model_instance
|
||||
|
|
@ -151,12 +135,8 @@ class ChucksHybridSearchRetriever:
|
|||
tsvector = func.to_tsvector("english", Chunk.content)
|
||||
tsquery = func.plainto_tsquery("english", query_text)
|
||||
|
||||
# Base conditions for document filtering
|
||||
base_conditions = [SearchSpace.user_id == user_id]
|
||||
|
||||
# Add search space filter if provided
|
||||
if search_space_id is not None:
|
||||
base_conditions.append(Document.search_space_id == search_space_id)
|
||||
# Base conditions for chunk filtering - search space is required
|
||||
base_conditions = [Document.search_space_id == search_space_id]
|
||||
|
||||
# Add document type filter if provided
|
||||
if document_type is not None:
|
||||
|
|
@ -171,7 +151,7 @@ class ChucksHybridSearchRetriever:
|
|||
else:
|
||||
base_conditions.append(Document.document_type == document_type)
|
||||
|
||||
# CTE for semantic search with user ownership check
|
||||
# CTE for semantic search filtered by search space
|
||||
semantic_search_cte = (
|
||||
select(
|
||||
Chunk.id,
|
||||
|
|
@ -180,7 +160,6 @@ class ChucksHybridSearchRetriever:
|
|||
.label("rank"),
|
||||
)
|
||||
.join(Document, Chunk.document_id == Document.id)
|
||||
.join(SearchSpace, Document.search_space_id == SearchSpace.id)
|
||||
.where(*base_conditions)
|
||||
)
|
||||
|
||||
|
|
@ -190,7 +169,7 @@ class ChucksHybridSearchRetriever:
|
|||
.cte("semantic_search")
|
||||
)
|
||||
|
||||
# CTE for keyword search with user ownership check
|
||||
# CTE for keyword search filtered by search space
|
||||
keyword_search_cte = (
|
||||
select(
|
||||
Chunk.id,
|
||||
|
|
@ -199,7 +178,6 @@ class ChucksHybridSearchRetriever:
|
|||
.label("rank"),
|
||||
)
|
||||
.join(Document, Chunk.document_id == Document.id)
|
||||
.join(SearchSpace, Document.search_space_id == SearchSpace.id)
|
||||
.where(*base_conditions)
|
||||
.where(tsvector.op("@@")(tsquery))
|
||||
)
|
||||
|
|
|
|||
|
|
@ -12,8 +12,7 @@ class DocumentHybridSearchRetriever:
|
|||
self,
|
||||
query_text: str,
|
||||
top_k: int,
|
||||
user_id: str,
|
||||
search_space_id: int | None = None,
|
||||
search_space_id: int,
|
||||
) -> list:
|
||||
"""
|
||||
Perform vector similarity search on documents.
|
||||
|
|
@ -21,8 +20,7 @@ class DocumentHybridSearchRetriever:
|
|||
Args:
|
||||
query_text: The search query text
|
||||
top_k: Number of results to return
|
||||
user_id: The ID of the user performing the search
|
||||
search_space_id: Optional search space ID to filter results
|
||||
search_space_id: The search space ID to search within
|
||||
|
||||
Returns:
|
||||
List of documents sorted by vector similarity
|
||||
|
|
@ -31,24 +29,19 @@ class DocumentHybridSearchRetriever:
|
|||
from sqlalchemy.orm import joinedload
|
||||
|
||||
from app.config import config
|
||||
from app.db import Document, SearchSpace
|
||||
from app.db import Document
|
||||
|
||||
# Get embedding for the query
|
||||
embedding_model = config.embedding_model_instance
|
||||
query_embedding = embedding_model.embed(query_text)
|
||||
|
||||
# Build the base query with user ownership check
|
||||
# Build the query filtered by search space
|
||||
query = (
|
||||
select(Document)
|
||||
.options(joinedload(Document.search_space))
|
||||
.join(SearchSpace, Document.search_space_id == SearchSpace.id)
|
||||
.where(SearchSpace.user_id == user_id)
|
||||
.where(Document.search_space_id == search_space_id)
|
||||
)
|
||||
|
||||
# Add search space filter if provided
|
||||
if search_space_id is not None:
|
||||
query = query.where(Document.search_space_id == search_space_id)
|
||||
|
||||
# Add vector similarity ordering
|
||||
query = query.order_by(Document.embedding.op("<=>")(query_embedding)).limit(
|
||||
top_k
|
||||
|
|
@ -64,8 +57,7 @@ class DocumentHybridSearchRetriever:
|
|||
self,
|
||||
query_text: str,
|
||||
top_k: int,
|
||||
user_id: str,
|
||||
search_space_id: int | None = None,
|
||||
search_space_id: int,
|
||||
) -> list:
|
||||
"""
|
||||
Perform full-text keyword search on documents.
|
||||
|
|
@ -73,8 +65,7 @@ class DocumentHybridSearchRetriever:
|
|||
Args:
|
||||
query_text: The search query text
|
||||
top_k: Number of results to return
|
||||
user_id: The ID of the user performing the search
|
||||
search_space_id: Optional search space ID to filter results
|
||||
search_space_id: The search space ID to search within
|
||||
|
||||
Returns:
|
||||
List of documents sorted by text relevance
|
||||
|
|
@ -82,27 +73,22 @@ class DocumentHybridSearchRetriever:
|
|||
from sqlalchemy import func, select
|
||||
from sqlalchemy.orm import joinedload
|
||||
|
||||
from app.db import Document, SearchSpace
|
||||
from app.db import Document
|
||||
|
||||
# Create tsvector and tsquery for PostgreSQL full-text search
|
||||
tsvector = func.to_tsvector("english", Document.content)
|
||||
tsquery = func.plainto_tsquery("english", query_text)
|
||||
|
||||
# Build the base query with user ownership check
|
||||
# Build the query filtered by search space
|
||||
query = (
|
||||
select(Document)
|
||||
.options(joinedload(Document.search_space))
|
||||
.join(SearchSpace, Document.search_space_id == SearchSpace.id)
|
||||
.where(SearchSpace.user_id == user_id)
|
||||
.where(Document.search_space_id == search_space_id)
|
||||
.where(
|
||||
tsvector.op("@@")(tsquery)
|
||||
) # Only include results that match the query
|
||||
)
|
||||
|
||||
# Add search space filter if provided
|
||||
if search_space_id is not None:
|
||||
query = query.where(Document.search_space_id == search_space_id)
|
||||
|
||||
# Add text search ranking
|
||||
query = query.order_by(func.ts_rank_cd(tsvector, tsquery).desc()).limit(top_k)
|
||||
|
||||
|
|
@ -116,8 +102,7 @@ class DocumentHybridSearchRetriever:
|
|||
self,
|
||||
query_text: str,
|
||||
top_k: int,
|
||||
user_id: str,
|
||||
search_space_id: int | None = None,
|
||||
search_space_id: int,
|
||||
document_type: str | None = None,
|
||||
) -> list:
|
||||
"""
|
||||
|
|
@ -126,8 +111,7 @@ class DocumentHybridSearchRetriever:
|
|||
Args:
|
||||
query_text: The search query text
|
||||
top_k: Number of results to return
|
||||
user_id: The ID of the user performing the search
|
||||
search_space_id: Optional search space ID to filter results
|
||||
search_space_id: The search space ID to search within
|
||||
document_type: Optional document type to filter results (e.g., "FILE", "CRAWLED_URL")
|
||||
|
||||
"""
|
||||
|
|
@ -135,7 +119,7 @@ class DocumentHybridSearchRetriever:
|
|||
from sqlalchemy.orm import joinedload
|
||||
|
||||
from app.config import config
|
||||
from app.db import Document, DocumentType, SearchSpace
|
||||
from app.db import Document, DocumentType
|
||||
|
||||
# Get embedding for the query
|
||||
embedding_model = config.embedding_model_instance
|
||||
|
|
@ -149,12 +133,8 @@ class DocumentHybridSearchRetriever:
|
|||
tsvector = func.to_tsvector("english", Document.content)
|
||||
tsquery = func.plainto_tsquery("english", query_text)
|
||||
|
||||
# Base conditions for document filtering
|
||||
base_conditions = [SearchSpace.user_id == user_id]
|
||||
|
||||
# Add search space filter if provided
|
||||
if search_space_id is not None:
|
||||
base_conditions.append(Document.search_space_id == search_space_id)
|
||||
# Base conditions for document filtering - search space is required
|
||||
base_conditions = [Document.search_space_id == search_space_id]
|
||||
|
||||
# Add document type filter if provided
|
||||
if document_type is not None:
|
||||
|
|
@ -169,17 +149,13 @@ class DocumentHybridSearchRetriever:
|
|||
else:
|
||||
base_conditions.append(Document.document_type == document_type)
|
||||
|
||||
# CTE for semantic search with user ownership check
|
||||
semantic_search_cte = (
|
||||
select(
|
||||
Document.id,
|
||||
func.rank()
|
||||
.over(order_by=Document.embedding.op("<=>")(query_embedding))
|
||||
.label("rank"),
|
||||
)
|
||||
.join(SearchSpace, Document.search_space_id == SearchSpace.id)
|
||||
.where(*base_conditions)
|
||||
)
|
||||
# CTE for semantic search filtered by search space
|
||||
semantic_search_cte = select(
|
||||
Document.id,
|
||||
func.rank()
|
||||
.over(order_by=Document.embedding.op("<=>")(query_embedding))
|
||||
.label("rank"),
|
||||
).where(*base_conditions)
|
||||
|
||||
semantic_search_cte = (
|
||||
semantic_search_cte.order_by(Document.embedding.op("<=>")(query_embedding))
|
||||
|
|
@ -187,7 +163,7 @@ class DocumentHybridSearchRetriever:
|
|||
.cte("semantic_search")
|
||||
)
|
||||
|
||||
# CTE for keyword search with user ownership check
|
||||
# CTE for keyword search filtered by search space
|
||||
keyword_search_cte = (
|
||||
select(
|
||||
Document.id,
|
||||
|
|
@ -195,7 +171,6 @@ class DocumentHybridSearchRetriever:
|
|||
.over(order_by=func.ts_rank_cd(tsvector, tsquery).desc())
|
||||
.label("rank"),
|
||||
)
|
||||
.join(SearchSpace, Document.search_space_id == SearchSpace.id)
|
||||
.where(*base_conditions)
|
||||
.where(tsvector.op("@@")(tsquery))
|
||||
)
|
||||
|
|
|
|||
|
|
@ -16,13 +16,14 @@ from .llm_config_routes import router as llm_config_router
|
|||
from .logs_routes import router as logs_router
|
||||
from .luma_add_connector_route import router as luma_add_connector_router
|
||||
from .podcasts_routes import router as podcasts_router
|
||||
from .rbac_routes import router as rbac_router
|
||||
from .search_source_connectors_routes import router as search_source_connectors_router
|
||||
from .search_spaces_routes import router as search_spaces_router
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
router.include_router(search_spaces_router)
|
||||
router.include_router(editor_router)
|
||||
router.include_router(rbac_router) # RBAC routes for roles, members, invites
|
||||
router.include_router(documents_router)
|
||||
router.include_router(podcasts_router)
|
||||
router.include_router(chats_router)
|
||||
|
|
|
|||
|
|
@ -6,7 +6,14 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
|||
from sqlalchemy.future import select
|
||||
from sqlalchemy.orm import selectinload
|
||||
|
||||
from app.db import Chat, SearchSpace, User, UserSearchSpacePreference, get_async_session
|
||||
from app.db import (
|
||||
Chat,
|
||||
Permission,
|
||||
SearchSpace,
|
||||
SearchSpaceMembership,
|
||||
User,
|
||||
get_async_session,
|
||||
)
|
||||
from app.schemas import (
|
||||
AISDKChatRequest,
|
||||
ChatCreate,
|
||||
|
|
@ -16,7 +23,7 @@ from app.schemas import (
|
|||
)
|
||||
from app.tasks.stream_connector_search_results import stream_connector_search_results
|
||||
from app.users import current_active_user
|
||||
from app.utils.check_ownership import check_ownership
|
||||
from app.utils.rbac import check_permission
|
||||
from app.utils.validators import (
|
||||
validate_connectors,
|
||||
validate_document_ids,
|
||||
|
|
@ -59,45 +66,38 @@ async def handle_chat_data(
|
|||
# print("RESQUEST DATA:", request_data)
|
||||
# print("SELECTED CONNECTORS:", selected_connectors)
|
||||
|
||||
# Check if the search space belongs to the current user
|
||||
# Check if the user has chat access to the search space
|
||||
try:
|
||||
await check_ownership(session, SearchSpace, search_space_id, user)
|
||||
language_result = await session.execute(
|
||||
select(UserSearchSpacePreference)
|
||||
.options(
|
||||
selectinload(UserSearchSpacePreference.search_space).selectinload(
|
||||
SearchSpace.llm_configs
|
||||
),
|
||||
# Note: Removed selectinload for LLM relationships as they no longer exist
|
||||
# Global configs (negative IDs) don't have foreign keys
|
||||
# LLM configs are now fetched manually when needed
|
||||
)
|
||||
.filter(
|
||||
UserSearchSpacePreference.search_space_id == search_space_id,
|
||||
UserSearchSpacePreference.user_id == user.id,
|
||||
)
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
search_space_id,
|
||||
Permission.CHATS_CREATE.value,
|
||||
"You don't have permission to use chat in this search space",
|
||||
)
|
||||
user_preference = language_result.scalars().first()
|
||||
# print("UserSearchSpacePreference:", user_preference)
|
||||
|
||||
# Get search space with LLM configs (preferences are now stored at search space level)
|
||||
search_space_result = await session.execute(
|
||||
select(SearchSpace)
|
||||
.options(selectinload(SearchSpace.llm_configs))
|
||||
.filter(SearchSpace.id == search_space_id)
|
||||
)
|
||||
search_space = search_space_result.scalars().first()
|
||||
|
||||
language = None
|
||||
llm_configs = [] # Initialize to empty list
|
||||
|
||||
if (
|
||||
user_preference
|
||||
and user_preference.search_space
|
||||
and user_preference.search_space.llm_configs
|
||||
):
|
||||
llm_configs = user_preference.search_space.llm_configs
|
||||
if search_space and search_space.llm_configs:
|
||||
llm_configs = search_space.llm_configs
|
||||
|
||||
# Manually fetch LLM configs since relationships no longer exist
|
||||
# Check fast_llm, long_context_llm, and strategic_llm IDs
|
||||
# Get language from configured LLM preferences
|
||||
# LLM preferences are now stored on the SearchSpace model
|
||||
from app.config import config as app_config
|
||||
|
||||
for llm_id in [
|
||||
user_preference.fast_llm_id,
|
||||
user_preference.long_context_llm_id,
|
||||
user_preference.strategic_llm_id,
|
||||
search_space.fast_llm_id,
|
||||
search_space.long_context_llm_id,
|
||||
search_space.strategic_llm_id,
|
||||
]:
|
||||
if llm_id is not None:
|
||||
# Check if it's a global config (negative ID)
|
||||
|
|
@ -161,8 +161,18 @@ async def create_chat(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""
|
||||
Create a new chat.
|
||||
Requires CHATS_CREATE permission.
|
||||
"""
|
||||
try:
|
||||
await check_ownership(session, SearchSpace, chat.search_space_id, user)
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
chat.search_space_id,
|
||||
Permission.CHATS_CREATE.value,
|
||||
"You don't have permission to create chats in this search space",
|
||||
)
|
||||
db_chat = Chat(**chat.model_dump())
|
||||
session.add(db_chat)
|
||||
await session.commit()
|
||||
|
|
@ -197,6 +207,10 @@ async def read_chats(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""
|
||||
List chats the user has access to.
|
||||
Requires CHATS_READ permission for the search space(s).
|
||||
"""
|
||||
# Validate pagination parameters
|
||||
if skip < 0:
|
||||
raise HTTPException(
|
||||
|
|
@ -212,9 +226,17 @@ async def read_chats(
|
|||
status_code=400, detail="search_space_id must be a positive integer"
|
||||
)
|
||||
try:
|
||||
# Select specific fields excluding messages
|
||||
query = (
|
||||
select(
|
||||
if search_space_id is not None:
|
||||
# Check permission for specific search space
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
search_space_id,
|
||||
Permission.CHATS_READ.value,
|
||||
"You don't have permission to read chats in this search space",
|
||||
)
|
||||
# Select specific fields excluding messages
|
||||
query = select(
|
||||
Chat.id,
|
||||
Chat.type,
|
||||
Chat.title,
|
||||
|
|
@ -222,17 +244,28 @@ async def read_chats(
|
|||
Chat.search_space_id,
|
||||
Chat.created_at,
|
||||
Chat.state_version,
|
||||
).filter(Chat.search_space_id == search_space_id)
|
||||
else:
|
||||
# Get chats from all search spaces user has membership in
|
||||
query = (
|
||||
select(
|
||||
Chat.id,
|
||||
Chat.type,
|
||||
Chat.title,
|
||||
Chat.initial_connectors,
|
||||
Chat.search_space_id,
|
||||
Chat.created_at,
|
||||
Chat.state_version,
|
||||
)
|
||||
.join(SearchSpace)
|
||||
.join(SearchSpaceMembership)
|
||||
.filter(SearchSpaceMembership.user_id == user.id)
|
||||
)
|
||||
.join(SearchSpace)
|
||||
.filter(SearchSpace.user_id == user.id)
|
||||
)
|
||||
|
||||
# Filter by search_space_id if provided
|
||||
if search_space_id is not None:
|
||||
query = query.filter(Chat.search_space_id == search_space_id)
|
||||
|
||||
result = await session.execute(query.offset(skip).limit(limit))
|
||||
return result.all()
|
||||
except HTTPException:
|
||||
raise
|
||||
except OperationalError:
|
||||
raise HTTPException(
|
||||
status_code=503, detail="Database operation failed. Please try again later."
|
||||
|
|
@ -249,19 +282,32 @@ async def read_chat(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""
|
||||
Get a specific chat by ID.
|
||||
Requires CHATS_READ permission for the search space.
|
||||
"""
|
||||
try:
|
||||
result = await session.execute(
|
||||
select(Chat)
|
||||
.join(SearchSpace)
|
||||
.filter(Chat.id == chat_id, SearchSpace.user_id == user.id)
|
||||
)
|
||||
result = await session.execute(select(Chat).filter(Chat.id == chat_id))
|
||||
chat = result.scalars().first()
|
||||
|
||||
if not chat:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="Chat not found or you don't have permission to access it",
|
||||
detail="Chat not found",
|
||||
)
|
||||
|
||||
# Check permission for the search space
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
chat.search_space_id,
|
||||
Permission.CHATS_READ.value,
|
||||
"You don't have permission to read chats in this search space",
|
||||
)
|
||||
|
||||
return chat
|
||||
except HTTPException:
|
||||
raise
|
||||
except OperationalError:
|
||||
raise HTTPException(
|
||||
status_code=503, detail="Database operation failed. Please try again later."
|
||||
|
|
@ -280,8 +326,26 @@ async def update_chat(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""
|
||||
Update a chat.
|
||||
Requires CHATS_UPDATE permission for the search space.
|
||||
"""
|
||||
try:
|
||||
db_chat = await read_chat(chat_id, session, user)
|
||||
result = await session.execute(select(Chat).filter(Chat.id == chat_id))
|
||||
db_chat = result.scalars().first()
|
||||
|
||||
if not db_chat:
|
||||
raise HTTPException(status_code=404, detail="Chat not found")
|
||||
|
||||
# Check permission for the search space
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
db_chat.search_space_id,
|
||||
Permission.CHATS_UPDATE.value,
|
||||
"You don't have permission to update chats in this search space",
|
||||
)
|
||||
|
||||
update_data = chat_update.model_dump(exclude_unset=True)
|
||||
for key, value in update_data.items():
|
||||
if key == "messages":
|
||||
|
|
@ -318,8 +382,26 @@ async def delete_chat(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""
|
||||
Delete a chat.
|
||||
Requires CHATS_DELETE permission for the search space.
|
||||
"""
|
||||
try:
|
||||
db_chat = await read_chat(chat_id, session, user)
|
||||
result = await session.execute(select(Chat).filter(Chat.id == chat_id))
|
||||
db_chat = result.scalars().first()
|
||||
|
||||
if not db_chat:
|
||||
raise HTTPException(status_code=404, detail="Chat not found")
|
||||
|
||||
# Check permission for the search space
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
db_chat.search_space_id,
|
||||
Permission.CHATS_DELETE.value,
|
||||
"You don't have permission to delete chats in this search space",
|
||||
)
|
||||
|
||||
await session.delete(db_chat)
|
||||
await session.commit()
|
||||
return {"message": "Chat deleted successfully"}
|
||||
|
|
|
|||
|
|
@ -10,7 +10,9 @@ from app.db import (
|
|||
Chunk,
|
||||
Document,
|
||||
DocumentType,
|
||||
Permission,
|
||||
SearchSpace,
|
||||
SearchSpaceMembership,
|
||||
User,
|
||||
get_async_session,
|
||||
)
|
||||
|
|
@ -22,7 +24,7 @@ from app.schemas import (
|
|||
PaginatedResponse,
|
||||
)
|
||||
from app.users import current_active_user
|
||||
from app.utils.check_ownership import check_ownership
|
||||
from app.utils.rbac import check_permission
|
||||
|
||||
try:
|
||||
asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
|
||||
|
|
@ -44,9 +46,19 @@ async def create_documents(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""
|
||||
Create new documents.
|
||||
Requires DOCUMENTS_CREATE permission.
|
||||
"""
|
||||
try:
|
||||
# Check if the user owns the search space
|
||||
await check_ownership(session, SearchSpace, request.search_space_id, user)
|
||||
# Check permission
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
request.search_space_id,
|
||||
Permission.DOCUMENTS_CREATE.value,
|
||||
"You don't have permission to create documents in this search space",
|
||||
)
|
||||
|
||||
if request.document_type == DocumentType.EXTENSION:
|
||||
from app.tasks.celery_tasks.document_tasks import (
|
||||
|
|
@ -65,13 +77,6 @@ async def create_documents(
|
|||
process_extension_document_task.delay(
|
||||
document_dict, request.search_space_id, str(user.id)
|
||||
)
|
||||
elif request.document_type == DocumentType.CRAWLED_URL:
|
||||
from app.tasks.celery_tasks.document_tasks import process_crawled_url_task
|
||||
|
||||
for url in request.content:
|
||||
process_crawled_url_task.delay(
|
||||
url, request.search_space_id, str(user.id)
|
||||
)
|
||||
elif request.document_type == DocumentType.YOUTUBE_VIDEO:
|
||||
from app.tasks.celery_tasks.document_tasks import process_youtube_video_task
|
||||
|
||||
|
|
@ -100,8 +105,19 @@ async def create_documents_file_upload(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""
|
||||
Upload files as documents.
|
||||
Requires DOCUMENTS_CREATE permission.
|
||||
"""
|
||||
try:
|
||||
await check_ownership(session, SearchSpace, search_space_id, user)
|
||||
# Check permission
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
search_space_id,
|
||||
Permission.DOCUMENTS_CREATE.value,
|
||||
"You don't have permission to create documents in this search space",
|
||||
)
|
||||
|
||||
if not files:
|
||||
raise HTTPException(status_code=400, detail="No files provided")
|
||||
|
|
@ -158,7 +174,8 @@ async def read_documents(
|
|||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""
|
||||
List documents owned by the current user, with optional filtering and pagination.
|
||||
List documents the user has access to, with optional filtering and pagination.
|
||||
Requires DOCUMENTS_READ permission for the search space(s).
|
||||
|
||||
Args:
|
||||
skip: Absolute number of items to skip from the beginning. If provided, it takes precedence over 'page'.
|
||||
|
|
@ -174,40 +191,49 @@ async def read_documents(
|
|||
|
||||
Notes:
|
||||
- If both 'skip' and 'page' are provided, 'skip' is used.
|
||||
- Results are scoped to documents owned by the current user.
|
||||
- Results are scoped to documents in search spaces the user has membership in.
|
||||
"""
|
||||
try:
|
||||
from sqlalchemy import func
|
||||
|
||||
query = (
|
||||
select(Document).join(SearchSpace).filter(SearchSpace.user_id == user.id)
|
||||
)
|
||||
|
||||
# Filter by search_space_id if provided
|
||||
# If specific search_space_id, check permission
|
||||
if search_space_id is not None:
|
||||
query = query.filter(Document.search_space_id == search_space_id)
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
search_space_id,
|
||||
Permission.DOCUMENTS_READ.value,
|
||||
"You don't have permission to read documents in this search space",
|
||||
)
|
||||
query = select(Document).filter(Document.search_space_id == search_space_id)
|
||||
count_query = (
|
||||
select(func.count())
|
||||
.select_from(Document)
|
||||
.filter(Document.search_space_id == search_space_id)
|
||||
)
|
||||
else:
|
||||
# Get documents from all search spaces user has membership in
|
||||
query = (
|
||||
select(Document)
|
||||
.join(SearchSpace)
|
||||
.join(SearchSpaceMembership)
|
||||
.filter(SearchSpaceMembership.user_id == user.id)
|
||||
)
|
||||
count_query = (
|
||||
select(func.count())
|
||||
.select_from(Document)
|
||||
.join(SearchSpace)
|
||||
.join(SearchSpaceMembership)
|
||||
.filter(SearchSpaceMembership.user_id == user.id)
|
||||
)
|
||||
|
||||
# Filter by document_types if provided
|
||||
if document_types is not None and document_types.strip():
|
||||
type_list = [t.strip() for t in document_types.split(",") if t.strip()]
|
||||
if type_list:
|
||||
query = query.filter(Document.document_type.in_(type_list))
|
||||
|
||||
# Get total count
|
||||
count_query = (
|
||||
select(func.count())
|
||||
.select_from(Document)
|
||||
.join(SearchSpace)
|
||||
.filter(SearchSpace.user_id == user.id)
|
||||
)
|
||||
if search_space_id is not None:
|
||||
count_query = count_query.filter(
|
||||
Document.search_space_id == search_space_id
|
||||
)
|
||||
if document_types is not None and document_types.strip():
|
||||
type_list = [t.strip() for t in document_types.split(",") if t.strip()]
|
||||
if type_list:
|
||||
count_query = count_query.filter(Document.document_type.in_(type_list))
|
||||
|
||||
total_result = await session.execute(count_query)
|
||||
total = total_result.scalar() or 0
|
||||
|
||||
|
|
@ -242,6 +268,8 @@ async def read_documents(
|
|||
)
|
||||
|
||||
return PaginatedResponse(items=api_documents, total=total)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to fetch documents: {e!s}"
|
||||
|
|
@ -261,6 +289,7 @@ async def search_documents(
|
|||
):
|
||||
"""
|
||||
Search documents by title substring, optionally filtered by search_space_id and document_types.
|
||||
Requires DOCUMENTS_READ permission for the search space(s).
|
||||
|
||||
Args:
|
||||
title: Case-insensitive substring to match against document titles. Required.
|
||||
|
|
@ -282,37 +311,48 @@ async def search_documents(
|
|||
try:
|
||||
from sqlalchemy import func
|
||||
|
||||
query = (
|
||||
select(Document).join(SearchSpace).filter(SearchSpace.user_id == user.id)
|
||||
)
|
||||
# If specific search_space_id, check permission
|
||||
if search_space_id is not None:
|
||||
query = query.filter(Document.search_space_id == search_space_id)
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
search_space_id,
|
||||
Permission.DOCUMENTS_READ.value,
|
||||
"You don't have permission to read documents in this search space",
|
||||
)
|
||||
query = select(Document).filter(Document.search_space_id == search_space_id)
|
||||
count_query = (
|
||||
select(func.count())
|
||||
.select_from(Document)
|
||||
.filter(Document.search_space_id == search_space_id)
|
||||
)
|
||||
else:
|
||||
# Get documents from all search spaces user has membership in
|
||||
query = (
|
||||
select(Document)
|
||||
.join(SearchSpace)
|
||||
.join(SearchSpaceMembership)
|
||||
.filter(SearchSpaceMembership.user_id == user.id)
|
||||
)
|
||||
count_query = (
|
||||
select(func.count())
|
||||
.select_from(Document)
|
||||
.join(SearchSpace)
|
||||
.join(SearchSpaceMembership)
|
||||
.filter(SearchSpaceMembership.user_id == user.id)
|
||||
)
|
||||
|
||||
# Only search by title (case-insensitive)
|
||||
query = query.filter(Document.title.ilike(f"%{title}%"))
|
||||
count_query = count_query.filter(Document.title.ilike(f"%{title}%"))
|
||||
|
||||
# Filter by document_types if provided
|
||||
if document_types is not None and document_types.strip():
|
||||
type_list = [t.strip() for t in document_types.split(",") if t.strip()]
|
||||
if type_list:
|
||||
query = query.filter(Document.document_type.in_(type_list))
|
||||
|
||||
# Get total count
|
||||
count_query = (
|
||||
select(func.count())
|
||||
.select_from(Document)
|
||||
.join(SearchSpace)
|
||||
.filter(SearchSpace.user_id == user.id)
|
||||
)
|
||||
if search_space_id is not None:
|
||||
count_query = count_query.filter(
|
||||
Document.search_space_id == search_space_id
|
||||
)
|
||||
count_query = count_query.filter(Document.title.ilike(f"%{title}%"))
|
||||
if document_types is not None and document_types.strip():
|
||||
type_list = [t.strip() for t in document_types.split(",") if t.strip()]
|
||||
if type_list:
|
||||
count_query = count_query.filter(Document.document_type.in_(type_list))
|
||||
|
||||
total_result = await session.execute(count_query)
|
||||
total = total_result.scalar() or 0
|
||||
|
||||
|
|
@ -347,6 +387,8 @@ async def search_documents(
|
|||
)
|
||||
|
||||
return PaginatedResponse(items=api_documents, total=total)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to search documents: {e!s}"
|
||||
|
|
@ -360,7 +402,8 @@ async def get_document_type_counts(
|
|||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""
|
||||
Get counts of documents by type for the current user.
|
||||
Get counts of documents by type for search spaces the user has access to.
|
||||
Requires DOCUMENTS_READ permission for the search space(s).
|
||||
|
||||
Args:
|
||||
search_space_id: If provided, restrict counts to a specific search space.
|
||||
|
|
@ -373,20 +416,36 @@ async def get_document_type_counts(
|
|||
try:
|
||||
from sqlalchemy import func
|
||||
|
||||
query = (
|
||||
select(Document.document_type, func.count(Document.id))
|
||||
.join(SearchSpace)
|
||||
.filter(SearchSpace.user_id == user.id)
|
||||
.group_by(Document.document_type)
|
||||
)
|
||||
|
||||
if search_space_id is not None:
|
||||
query = query.filter(Document.search_space_id == search_space_id)
|
||||
# Check permission for specific search space
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
search_space_id,
|
||||
Permission.DOCUMENTS_READ.value,
|
||||
"You don't have permission to read documents in this search space",
|
||||
)
|
||||
query = (
|
||||
select(Document.document_type, func.count(Document.id))
|
||||
.filter(Document.search_space_id == search_space_id)
|
||||
.group_by(Document.document_type)
|
||||
)
|
||||
else:
|
||||
# Get counts from all search spaces user has membership in
|
||||
query = (
|
||||
select(Document.document_type, func.count(Document.id))
|
||||
.join(SearchSpace)
|
||||
.join(SearchSpaceMembership)
|
||||
.filter(SearchSpaceMembership.user_id == user.id)
|
||||
.group_by(Document.document_type)
|
||||
)
|
||||
|
||||
result = await session.execute(query)
|
||||
type_counts = dict(result.all())
|
||||
|
||||
return type_counts
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to fetch document type counts: {e!s}"
|
||||
|
|
@ -401,6 +460,7 @@ async def get_document_by_chunk_id(
|
|||
):
|
||||
"""
|
||||
Retrieves a document based on a chunk ID, including all its chunks ordered by creation time.
|
||||
Requires DOCUMENTS_READ permission for the search space.
|
||||
The document's embedding and chunk embeddings are excluded from the response.
|
||||
"""
|
||||
try:
|
||||
|
|
@ -413,21 +473,29 @@ async def get_document_by_chunk_id(
|
|||
status_code=404, detail=f"Chunk with id {chunk_id} not found"
|
||||
)
|
||||
|
||||
# Get the associated document and verify ownership
|
||||
# Get the associated document
|
||||
document_result = await session.execute(
|
||||
select(Document)
|
||||
.options(selectinload(Document.chunks))
|
||||
.join(SearchSpace)
|
||||
.filter(Document.id == chunk.document_id, SearchSpace.user_id == user.id)
|
||||
.filter(Document.id == chunk.document_id)
|
||||
)
|
||||
document = document_result.scalars().first()
|
||||
|
||||
if not document:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="Document not found or you don't have access to it",
|
||||
detail="Document not found",
|
||||
)
|
||||
|
||||
# Check permission for the search space
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
document.search_space_id,
|
||||
Permission.DOCUMENTS_READ.value,
|
||||
"You don't have permission to read documents in this search space",
|
||||
)
|
||||
|
||||
# Sort chunks by creation time
|
||||
sorted_chunks = sorted(document.chunks, key=lambda x: x.created_at)
|
||||
|
||||
|
|
@ -456,11 +524,13 @@ async def read_document(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""
|
||||
Get a specific document by ID.
|
||||
Requires DOCUMENTS_READ permission for the search space.
|
||||
"""
|
||||
try:
|
||||
result = await session.execute(
|
||||
select(Document)
|
||||
.join(SearchSpace)
|
||||
.filter(Document.id == document_id, SearchSpace.user_id == user.id)
|
||||
select(Document).filter(Document.id == document_id)
|
||||
)
|
||||
document = result.scalars().first()
|
||||
|
||||
|
|
@ -469,6 +539,15 @@ async def read_document(
|
|||
status_code=404, detail=f"Document with id {document_id} not found"
|
||||
)
|
||||
|
||||
# Check permission for the search space
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
document.search_space_id,
|
||||
Permission.DOCUMENTS_READ.value,
|
||||
"You don't have permission to read documents in this search space",
|
||||
)
|
||||
|
||||
# Convert database object to API-friendly format
|
||||
return DocumentRead(
|
||||
id=document.id,
|
||||
|
|
@ -479,6 +558,8 @@ async def read_document(
|
|||
created_at=document.created_at,
|
||||
search_space_id=document.search_space_id,
|
||||
)
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to fetch document: {e!s}"
|
||||
|
|
@ -492,12 +573,13 @@ async def update_document(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""
|
||||
Update a document.
|
||||
Requires DOCUMENTS_UPDATE permission for the search space.
|
||||
"""
|
||||
try:
|
||||
# Query the document directly instead of using read_document function
|
||||
result = await session.execute(
|
||||
select(Document)
|
||||
.join(SearchSpace)
|
||||
.filter(Document.id == document_id, SearchSpace.user_id == user.id)
|
||||
select(Document).filter(Document.id == document_id)
|
||||
)
|
||||
db_document = result.scalars().first()
|
||||
|
||||
|
|
@ -506,6 +588,15 @@ async def update_document(
|
|||
status_code=404, detail=f"Document with id {document_id} not found"
|
||||
)
|
||||
|
||||
# Check permission for the search space
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
db_document.search_space_id,
|
||||
Permission.DOCUMENTS_UPDATE.value,
|
||||
"You don't have permission to update documents in this search space",
|
||||
)
|
||||
|
||||
update_data = document_update.model_dump(exclude_unset=True)
|
||||
for key, value in update_data.items():
|
||||
setattr(db_document, key, value)
|
||||
|
|
@ -537,12 +628,13 @@ async def delete_document(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""
|
||||
Delete a document.
|
||||
Requires DOCUMENTS_DELETE permission for the search space.
|
||||
"""
|
||||
try:
|
||||
# Query the document directly instead of using read_document function
|
||||
result = await session.execute(
|
||||
select(Document)
|
||||
.join(SearchSpace)
|
||||
.filter(Document.id == document_id, SearchSpace.user_id == user.id)
|
||||
select(Document).filter(Document.id == document_id)
|
||||
)
|
||||
document = result.scalars().first()
|
||||
|
||||
|
|
@ -551,6 +643,15 @@ async def delete_document(
|
|||
status_code=404, detail=f"Document with id {document_id} not found"
|
||||
)
|
||||
|
||||
# Check permission for the search space
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
document.search_space_id,
|
||||
Permission.DOCUMENTS_DELETE.value,
|
||||
"You don't have permission to delete documents in this search space",
|
||||
)
|
||||
|
||||
await session.delete(document)
|
||||
await session.commit()
|
||||
return {"message": "Document deleted successfully"}
|
||||
|
|
|
|||
|
|
@ -8,67 +8,22 @@ from sqlalchemy.future import select
|
|||
from app.config import config
|
||||
from app.db import (
|
||||
LLMConfig,
|
||||
Permission,
|
||||
SearchSpace,
|
||||
User,
|
||||
UserSearchSpacePreference,
|
||||
get_async_session,
|
||||
)
|
||||
from app.schemas import LLMConfigCreate, LLMConfigRead, LLMConfigUpdate
|
||||
from app.services.llm_service import validate_llm_config
|
||||
from app.users import current_active_user
|
||||
from app.utils.rbac import check_permission
|
||||
|
||||
router = APIRouter()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Helper function to check search space access
|
||||
async def check_search_space_access(
|
||||
session: AsyncSession, search_space_id: int, user: User
|
||||
) -> SearchSpace:
|
||||
"""Verify that the user has access to the search space"""
|
||||
result = await session.execute(
|
||||
select(SearchSpace).filter(
|
||||
SearchSpace.id == search_space_id, SearchSpace.user_id == user.id
|
||||
)
|
||||
)
|
||||
search_space = result.scalars().first()
|
||||
if not search_space:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="Search space not found or you don't have permission to access it",
|
||||
)
|
||||
return search_space
|
||||
|
||||
|
||||
# Helper function to get or create user search space preference
|
||||
async def get_or_create_user_preference(
|
||||
session: AsyncSession, user_id, search_space_id: int
|
||||
) -> UserSearchSpacePreference:
|
||||
"""Get or create user preference for a search space"""
|
||||
result = await session.execute(
|
||||
select(UserSearchSpacePreference).filter(
|
||||
UserSearchSpacePreference.user_id == user_id,
|
||||
UserSearchSpacePreference.search_space_id == search_space_id,
|
||||
)
|
||||
# Removed selectinload options since relationships no longer exist
|
||||
)
|
||||
preference = result.scalars().first()
|
||||
|
||||
if not preference:
|
||||
# Create new preference entry
|
||||
preference = UserSearchSpacePreference(
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
)
|
||||
session.add(preference)
|
||||
await session.commit()
|
||||
await session.refresh(preference)
|
||||
|
||||
return preference
|
||||
|
||||
|
||||
class LLMPreferencesUpdate(BaseModel):
|
||||
"""Schema for updating user LLM preferences"""
|
||||
"""Schema for updating search space LLM preferences"""
|
||||
|
||||
long_context_llm_id: int | None = None
|
||||
fast_llm_id: int | None = None
|
||||
|
|
@ -76,7 +31,7 @@ class LLMPreferencesUpdate(BaseModel):
|
|||
|
||||
|
||||
class LLMPreferencesRead(BaseModel):
|
||||
"""Schema for reading user LLM preferences"""
|
||||
"""Schema for reading search space LLM preferences"""
|
||||
|
||||
long_context_llm_id: int | None = None
|
||||
fast_llm_id: int | None = None
|
||||
|
|
@ -144,10 +99,19 @@ async def create_llm_config(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""Create a new LLM configuration for a search space"""
|
||||
"""
|
||||
Create a new LLM configuration for a search space.
|
||||
Requires LLM_CONFIGS_CREATE permission.
|
||||
"""
|
||||
try:
|
||||
# Verify user has access to the search space
|
||||
await check_search_space_access(session, llm_config.search_space_id, user)
|
||||
# Verify user has permission to create LLM configs
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
llm_config.search_space_id,
|
||||
Permission.LLM_CONFIGS_CREATE.value,
|
||||
"You don't have permission to create LLM configurations in this search space",
|
||||
)
|
||||
|
||||
# Validate the LLM configuration by making a test API call
|
||||
is_valid, error_message = await validate_llm_config(
|
||||
|
|
@ -187,10 +151,19 @@ async def read_llm_configs(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""Get all LLM configurations for a search space"""
|
||||
"""
|
||||
Get all LLM configurations for a search space.
|
||||
Requires LLM_CONFIGS_READ permission.
|
||||
"""
|
||||
try:
|
||||
# Verify user has access to the search space
|
||||
await check_search_space_access(session, search_space_id, user)
|
||||
# Verify user has permission to read LLM configs
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
search_space_id,
|
||||
Permission.LLM_CONFIGS_READ.value,
|
||||
"You don't have permission to view LLM configurations in this search space",
|
||||
)
|
||||
|
||||
result = await session.execute(
|
||||
select(LLMConfig)
|
||||
|
|
@ -213,7 +186,10 @@ async def read_llm_config(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""Get a specific LLM configuration by ID"""
|
||||
"""
|
||||
Get a specific LLM configuration by ID.
|
||||
Requires LLM_CONFIGS_READ permission.
|
||||
"""
|
||||
try:
|
||||
# Get the LLM config
|
||||
result = await session.execute(
|
||||
|
|
@ -224,8 +200,14 @@ async def read_llm_config(
|
|||
if not llm_config:
|
||||
raise HTTPException(status_code=404, detail="LLM configuration not found")
|
||||
|
||||
# Verify user has access to the search space
|
||||
await check_search_space_access(session, llm_config.search_space_id, user)
|
||||
# Verify user has permission to read LLM configs
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
llm_config.search_space_id,
|
||||
Permission.LLM_CONFIGS_READ.value,
|
||||
"You don't have permission to view LLM configurations in this search space",
|
||||
)
|
||||
|
||||
return llm_config
|
||||
except HTTPException:
|
||||
|
|
@ -243,7 +225,10 @@ async def update_llm_config(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""Update an existing LLM configuration"""
|
||||
"""
|
||||
Update an existing LLM configuration.
|
||||
Requires LLM_CONFIGS_UPDATE permission.
|
||||
"""
|
||||
try:
|
||||
# Get the LLM config
|
||||
result = await session.execute(
|
||||
|
|
@ -254,8 +239,14 @@ async def update_llm_config(
|
|||
if not db_llm_config:
|
||||
raise HTTPException(status_code=404, detail="LLM configuration not found")
|
||||
|
||||
# Verify user has access to the search space
|
||||
await check_search_space_access(session, db_llm_config.search_space_id, user)
|
||||
# Verify user has permission to update LLM configs
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
db_llm_config.search_space_id,
|
||||
Permission.LLM_CONFIGS_UPDATE.value,
|
||||
"You don't have permission to update LLM configurations in this search space",
|
||||
)
|
||||
|
||||
update_data = llm_config_update.model_dump(exclude_unset=True)
|
||||
|
||||
|
|
@ -311,7 +302,10 @@ async def delete_llm_config(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""Delete an LLM configuration"""
|
||||
"""
|
||||
Delete an LLM configuration.
|
||||
Requires LLM_CONFIGS_DELETE permission.
|
||||
"""
|
||||
try:
|
||||
# Get the LLM config
|
||||
result = await session.execute(
|
||||
|
|
@ -322,8 +316,14 @@ async def delete_llm_config(
|
|||
if not db_llm_config:
|
||||
raise HTTPException(status_code=404, detail="LLM configuration not found")
|
||||
|
||||
# Verify user has access to the search space
|
||||
await check_search_space_access(session, db_llm_config.search_space_id, user)
|
||||
# Verify user has permission to delete LLM configs
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
db_llm_config.search_space_id,
|
||||
Permission.LLM_CONFIGS_DELETE.value,
|
||||
"You don't have permission to delete LLM configurations in this search space",
|
||||
)
|
||||
|
||||
await session.delete(db_llm_config)
|
||||
await session.commit()
|
||||
|
|
@ -337,28 +337,42 @@ async def delete_llm_config(
|
|||
) from e
|
||||
|
||||
|
||||
# User LLM Preferences endpoints
|
||||
# Search Space LLM Preferences endpoints
|
||||
|
||||
|
||||
@router.get(
|
||||
"/search-spaces/{search_space_id}/llm-preferences",
|
||||
response_model=LLMPreferencesRead,
|
||||
)
|
||||
async def get_user_llm_preferences(
|
||||
async def get_llm_preferences(
|
||||
search_space_id: int,
|
||||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""Get the current user's LLM preferences for a specific search space"""
|
||||
"""
|
||||
Get the LLM preferences for a specific search space.
|
||||
LLM preferences are shared by all members of the search space.
|
||||
Requires LLM_CONFIGS_READ permission.
|
||||
"""
|
||||
try:
|
||||
# Verify user has access to the search space
|
||||
await check_search_space_access(session, search_space_id, user)
|
||||
|
||||
# Get or create user preference for this search space
|
||||
preference = await get_or_create_user_preference(
|
||||
session, user.id, search_space_id
|
||||
# Verify user has permission to read LLM configs
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
search_space_id,
|
||||
Permission.LLM_CONFIGS_READ.value,
|
||||
"You don't have permission to view LLM preferences in this search space",
|
||||
)
|
||||
|
||||
# Get the search space
|
||||
result = await session.execute(
|
||||
select(SearchSpace).filter(SearchSpace.id == search_space_id)
|
||||
)
|
||||
search_space = result.scalars().first()
|
||||
|
||||
if not search_space:
|
||||
raise HTTPException(status_code=404, detail="Search space not found")
|
||||
|
||||
# Helper function to get config (global or custom)
|
||||
async def get_config_for_id(config_id):
|
||||
if config_id is None:
|
||||
|
|
@ -391,14 +405,14 @@ async def get_user_llm_preferences(
|
|||
return result.scalars().first()
|
||||
|
||||
# Get the configs (from DB for custom, or constructed for global)
|
||||
long_context_llm = await get_config_for_id(preference.long_context_llm_id)
|
||||
fast_llm = await get_config_for_id(preference.fast_llm_id)
|
||||
strategic_llm = await get_config_for_id(preference.strategic_llm_id)
|
||||
long_context_llm = await get_config_for_id(search_space.long_context_llm_id)
|
||||
fast_llm = await get_config_for_id(search_space.fast_llm_id)
|
||||
strategic_llm = await get_config_for_id(search_space.strategic_llm_id)
|
||||
|
||||
return {
|
||||
"long_context_llm_id": preference.long_context_llm_id,
|
||||
"fast_llm_id": preference.fast_llm_id,
|
||||
"strategic_llm_id": preference.strategic_llm_id,
|
||||
"long_context_llm_id": search_space.long_context_llm_id,
|
||||
"fast_llm_id": search_space.fast_llm_id,
|
||||
"strategic_llm_id": search_space.strategic_llm_id,
|
||||
"long_context_llm": long_context_llm,
|
||||
"fast_llm": fast_llm,
|
||||
"strategic_llm": strategic_llm,
|
||||
|
|
@ -415,22 +429,37 @@ async def get_user_llm_preferences(
|
|||
"/search-spaces/{search_space_id}/llm-preferences",
|
||||
response_model=LLMPreferencesRead,
|
||||
)
|
||||
async def update_user_llm_preferences(
|
||||
async def update_llm_preferences(
|
||||
search_space_id: int,
|
||||
preferences: LLMPreferencesUpdate,
|
||||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""Update the current user's LLM preferences for a specific search space"""
|
||||
"""
|
||||
Update the LLM preferences for a specific search space.
|
||||
LLM preferences are shared by all members of the search space.
|
||||
Requires SETTINGS_UPDATE permission (only users with settings access can change).
|
||||
"""
|
||||
try:
|
||||
# Verify user has access to the search space
|
||||
await check_search_space_access(session, search_space_id, user)
|
||||
|
||||
# Get or create user preference for this search space
|
||||
preference = await get_or_create_user_preference(
|
||||
session, user.id, search_space_id
|
||||
# Verify user has permission to update settings (not just LLM configs)
|
||||
# This ensures only users with settings access can change shared LLM preferences
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
search_space_id,
|
||||
Permission.SETTINGS_UPDATE.value,
|
||||
"You don't have permission to update LLM preferences in this search space",
|
||||
)
|
||||
|
||||
# Get the search space
|
||||
result = await session.execute(
|
||||
select(SearchSpace).filter(SearchSpace.id == search_space_id)
|
||||
)
|
||||
search_space = result.scalars().first()
|
||||
|
||||
if not search_space:
|
||||
raise HTTPException(status_code=404, detail="Search space not found")
|
||||
|
||||
# Validate that all provided LLM config IDs belong to the search space
|
||||
update_data = preferences.model_dump(exclude_unset=True)
|
||||
|
||||
|
|
@ -485,18 +514,13 @@ async def update_user_llm_preferences(
|
|||
f"Multiple languages detected in LLM selection for search_space {search_space_id}: {languages}. "
|
||||
"This may affect response quality."
|
||||
)
|
||||
# Don't raise an exception - allow users to proceed
|
||||
# raise HTTPException(
|
||||
# status_code=400,
|
||||
# detail="All selected LLM configurations must have the same language setting",
|
||||
# )
|
||||
|
||||
# Update user preferences
|
||||
# Update search space LLM preferences
|
||||
for key, value in update_data.items():
|
||||
setattr(preference, key, value)
|
||||
setattr(search_space, key, value)
|
||||
|
||||
await session.commit()
|
||||
await session.refresh(preference)
|
||||
await session.refresh(search_space)
|
||||
|
||||
# Helper function to get config (global or custom)
|
||||
async def get_config_for_id(config_id):
|
||||
|
|
@ -530,15 +554,15 @@ async def update_user_llm_preferences(
|
|||
return result.scalars().first()
|
||||
|
||||
# Get the configs (from DB for custom, or constructed for global)
|
||||
long_context_llm = await get_config_for_id(preference.long_context_llm_id)
|
||||
fast_llm = await get_config_for_id(preference.fast_llm_id)
|
||||
strategic_llm = await get_config_for_id(preference.strategic_llm_id)
|
||||
long_context_llm = await get_config_for_id(search_space.long_context_llm_id)
|
||||
fast_llm = await get_config_for_id(search_space.fast_llm_id)
|
||||
strategic_llm = await get_config_for_id(search_space.strategic_llm_id)
|
||||
|
||||
# Return updated preferences
|
||||
return {
|
||||
"long_context_llm_id": preference.long_context_llm_id,
|
||||
"fast_llm_id": preference.fast_llm_id,
|
||||
"strategic_llm_id": preference.strategic_llm_id,
|
||||
"long_context_llm_id": search_space.long_context_llm_id,
|
||||
"fast_llm_id": search_space.fast_llm_id,
|
||||
"strategic_llm_id": search_space.strategic_llm_id,
|
||||
"long_context_llm": long_context_llm,
|
||||
"fast_llm": fast_llm,
|
||||
"strategic_llm": strategic_llm,
|
||||
|
|
|
|||
|
|
@ -5,10 +5,19 @@ from sqlalchemy import and_, desc
|
|||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.future import select
|
||||
|
||||
from app.db import Log, LogLevel, LogStatus, SearchSpace, User, get_async_session
|
||||
from app.db import (
|
||||
Log,
|
||||
LogLevel,
|
||||
LogStatus,
|
||||
Permission,
|
||||
SearchSpace,
|
||||
SearchSpaceMembership,
|
||||
User,
|
||||
get_async_session,
|
||||
)
|
||||
from app.schemas import LogCreate, LogRead, LogUpdate
|
||||
from app.users import current_active_user
|
||||
from app.utils.check_ownership import check_ownership
|
||||
from app.utils.rbac import check_permission
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
|
@ -19,10 +28,19 @@ async def create_log(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""Create a new log entry."""
|
||||
"""
|
||||
Create a new log entry.
|
||||
Note: This is typically called internally. Requires LOGS_READ permission (since logs are usually system-generated).
|
||||
"""
|
||||
try:
|
||||
# Check if the user owns the search space
|
||||
await check_ownership(session, SearchSpace, log.search_space_id, user)
|
||||
# Check if the user has access to the search space
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
log.search_space_id,
|
||||
Permission.LOGS_READ.value,
|
||||
"You don't have permission to access logs in this search space",
|
||||
)
|
||||
|
||||
db_log = Log(**log.model_dump())
|
||||
session.add(db_log)
|
||||
|
|
@ -51,22 +69,38 @@ async def read_logs(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""Get logs with optional filtering."""
|
||||
"""
|
||||
Get logs with optional filtering.
|
||||
Requires LOGS_READ permission for the search space(s).
|
||||
"""
|
||||
try:
|
||||
# Build base query - only logs from user's search spaces
|
||||
query = (
|
||||
select(Log)
|
||||
.join(SearchSpace)
|
||||
.filter(SearchSpace.user_id == user.id)
|
||||
.order_by(desc(Log.created_at)) # Most recent first
|
||||
)
|
||||
|
||||
# Apply filters
|
||||
filters = []
|
||||
|
||||
if search_space_id is not None:
|
||||
await check_ownership(session, SearchSpace, search_space_id, user)
|
||||
filters.append(Log.search_space_id == search_space_id)
|
||||
# Check permission for specific search space
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
search_space_id,
|
||||
Permission.LOGS_READ.value,
|
||||
"You don't have permission to read logs in this search space",
|
||||
)
|
||||
# Build query for specific search space
|
||||
query = (
|
||||
select(Log)
|
||||
.filter(Log.search_space_id == search_space_id)
|
||||
.order_by(desc(Log.created_at))
|
||||
)
|
||||
else:
|
||||
# Build base query - logs from search spaces user has membership in
|
||||
query = (
|
||||
select(Log)
|
||||
.join(SearchSpace)
|
||||
.join(SearchSpaceMembership)
|
||||
.filter(SearchSpaceMembership.user_id == user.id)
|
||||
.order_by(desc(Log.created_at))
|
||||
)
|
||||
|
||||
if level is not None:
|
||||
filters.append(Log.level == level)
|
||||
|
|
@ -104,19 +138,26 @@ async def read_log(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""Get a specific log by ID."""
|
||||
"""
|
||||
Get a specific log by ID.
|
||||
Requires LOGS_READ permission for the search space.
|
||||
"""
|
||||
try:
|
||||
# Get log and verify user owns the search space
|
||||
result = await session.execute(
|
||||
select(Log)
|
||||
.join(SearchSpace)
|
||||
.filter(Log.id == log_id, SearchSpace.user_id == user.id)
|
||||
)
|
||||
result = await session.execute(select(Log).filter(Log.id == log_id))
|
||||
log = result.scalars().first()
|
||||
|
||||
if not log:
|
||||
raise HTTPException(status_code=404, detail="Log not found")
|
||||
|
||||
# Check permission for the search space
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
log.search_space_id,
|
||||
Permission.LOGS_READ.value,
|
||||
"You don't have permission to read logs in this search space",
|
||||
)
|
||||
|
||||
return log
|
||||
except HTTPException:
|
||||
raise
|
||||
|
|
@ -133,19 +174,26 @@ async def update_log(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""Update a log entry."""
|
||||
"""
|
||||
Update a log entry.
|
||||
Requires LOGS_READ permission (logs are typically updated by system).
|
||||
"""
|
||||
try:
|
||||
# Get log and verify user owns the search space
|
||||
result = await session.execute(
|
||||
select(Log)
|
||||
.join(SearchSpace)
|
||||
.filter(Log.id == log_id, SearchSpace.user_id == user.id)
|
||||
)
|
||||
result = await session.execute(select(Log).filter(Log.id == log_id))
|
||||
db_log = result.scalars().first()
|
||||
|
||||
if not db_log:
|
||||
raise HTTPException(status_code=404, detail="Log not found")
|
||||
|
||||
# Check permission for the search space
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
db_log.search_space_id,
|
||||
Permission.LOGS_READ.value,
|
||||
"You don't have permission to access logs in this search space",
|
||||
)
|
||||
|
||||
# Update only provided fields
|
||||
update_data = log_update.model_dump(exclude_unset=True)
|
||||
for field, value in update_data.items():
|
||||
|
|
@ -169,19 +217,26 @@ async def delete_log(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""Delete a log entry."""
|
||||
"""
|
||||
Delete a log entry.
|
||||
Requires LOGS_DELETE permission for the search space.
|
||||
"""
|
||||
try:
|
||||
# Get log and verify user owns the search space
|
||||
result = await session.execute(
|
||||
select(Log)
|
||||
.join(SearchSpace)
|
||||
.filter(Log.id == log_id, SearchSpace.user_id == user.id)
|
||||
)
|
||||
result = await session.execute(select(Log).filter(Log.id == log_id))
|
||||
db_log = result.scalars().first()
|
||||
|
||||
if not db_log:
|
||||
raise HTTPException(status_code=404, detail="Log not found")
|
||||
|
||||
# Check permission for the search space
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
db_log.search_space_id,
|
||||
Permission.LOGS_DELETE.value,
|
||||
"You don't have permission to delete logs in this search space",
|
||||
)
|
||||
|
||||
await session.delete(db_log)
|
||||
await session.commit()
|
||||
return {"message": "Log deleted successfully"}
|
||||
|
|
@ -201,10 +256,19 @@ async def get_logs_summary(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""Get a summary of logs for a search space in the last X hours."""
|
||||
"""
|
||||
Get a summary of logs for a search space in the last X hours.
|
||||
Requires LOGS_READ permission for the search space.
|
||||
"""
|
||||
try:
|
||||
# Check ownership
|
||||
await check_ownership(session, SearchSpace, search_space_id, user)
|
||||
# Check permission
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
search_space_id,
|
||||
Permission.LOGS_READ.value,
|
||||
"You don't have permission to read logs in this search space",
|
||||
)
|
||||
|
||||
# Calculate time window
|
||||
since = datetime.utcnow().replace(microsecond=0) - timedelta(hours=hours)
|
||||
|
|
|
|||
|
|
@ -7,7 +7,15 @@ from sqlalchemy.exc import IntegrityError, SQLAlchemyError
|
|||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.future import select
|
||||
|
||||
from app.db import Chat, Podcast, SearchSpace, User, get_async_session
|
||||
from app.db import (
|
||||
Chat,
|
||||
Permission,
|
||||
Podcast,
|
||||
SearchSpace,
|
||||
SearchSpaceMembership,
|
||||
User,
|
||||
get_async_session,
|
||||
)
|
||||
from app.schemas import (
|
||||
PodcastCreate,
|
||||
PodcastGenerateRequest,
|
||||
|
|
@ -16,7 +24,7 @@ from app.schemas import (
|
|||
)
|
||||
from app.tasks.podcast_tasks import generate_chat_podcast
|
||||
from app.users import current_active_user
|
||||
from app.utils.check_ownership import check_ownership
|
||||
from app.utils.rbac import check_permission
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
|
@ -27,8 +35,18 @@ async def create_podcast(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""
|
||||
Create a new podcast.
|
||||
Requires PODCASTS_CREATE permission.
|
||||
"""
|
||||
try:
|
||||
await check_ownership(session, SearchSpace, podcast.search_space_id, user)
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
podcast.search_space_id,
|
||||
Permission.PODCASTS_CREATE.value,
|
||||
"You don't have permission to create podcasts in this search space",
|
||||
)
|
||||
db_podcast = Podcast(**podcast.model_dump())
|
||||
session.add(db_podcast)
|
||||
await session.commit()
|
||||
|
|
@ -58,20 +76,45 @@ async def create_podcast(
|
|||
async def read_podcasts(
|
||||
skip: int = 0,
|
||||
limit: int = 100,
|
||||
search_space_id: int | None = None,
|
||||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""
|
||||
List podcasts the user has access to.
|
||||
Requires PODCASTS_READ permission for the search space(s).
|
||||
"""
|
||||
if skip < 0 or limit < 1:
|
||||
raise HTTPException(status_code=400, detail="Invalid pagination parameters")
|
||||
try:
|
||||
result = await session.execute(
|
||||
select(Podcast)
|
||||
.join(SearchSpace)
|
||||
.filter(SearchSpace.user_id == user.id)
|
||||
.offset(skip)
|
||||
.limit(limit)
|
||||
)
|
||||
if search_space_id is not None:
|
||||
# Check permission for specific search space
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
search_space_id,
|
||||
Permission.PODCASTS_READ.value,
|
||||
"You don't have permission to read podcasts in this search space",
|
||||
)
|
||||
result = await session.execute(
|
||||
select(Podcast)
|
||||
.filter(Podcast.search_space_id == search_space_id)
|
||||
.offset(skip)
|
||||
.limit(limit)
|
||||
)
|
||||
else:
|
||||
# Get podcasts from all search spaces user has membership in
|
||||
result = await session.execute(
|
||||
select(Podcast)
|
||||
.join(SearchSpace)
|
||||
.join(SearchSpaceMembership)
|
||||
.filter(SearchSpaceMembership.user_id == user.id)
|
||||
.offset(skip)
|
||||
.limit(limit)
|
||||
)
|
||||
return result.scalars().all()
|
||||
except HTTPException:
|
||||
raise
|
||||
except SQLAlchemyError:
|
||||
raise HTTPException(
|
||||
status_code=500, detail="Database error occurred while fetching podcasts"
|
||||
|
|
@ -84,18 +127,29 @@ async def read_podcast(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""
|
||||
Get a specific podcast by ID.
|
||||
Requires PODCASTS_READ permission for the search space.
|
||||
"""
|
||||
try:
|
||||
result = await session.execute(
|
||||
select(Podcast)
|
||||
.join(SearchSpace)
|
||||
.filter(Podcast.id == podcast_id, SearchSpace.user_id == user.id)
|
||||
)
|
||||
result = await session.execute(select(Podcast).filter(Podcast.id == podcast_id))
|
||||
podcast = result.scalars().first()
|
||||
|
||||
if not podcast:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="Podcast not found or you don't have permission to access it",
|
||||
detail="Podcast not found",
|
||||
)
|
||||
|
||||
# Check permission for the search space
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
podcast.search_space_id,
|
||||
Permission.PODCASTS_READ.value,
|
||||
"You don't have permission to read podcasts in this search space",
|
||||
)
|
||||
|
||||
return podcast
|
||||
except HTTPException as he:
|
||||
raise he
|
||||
|
|
@ -112,8 +166,26 @@ async def update_podcast(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""
|
||||
Update a podcast.
|
||||
Requires PODCASTS_UPDATE permission for the search space.
|
||||
"""
|
||||
try:
|
||||
db_podcast = await read_podcast(podcast_id, session, user)
|
||||
result = await session.execute(select(Podcast).filter(Podcast.id == podcast_id))
|
||||
db_podcast = result.scalars().first()
|
||||
|
||||
if not db_podcast:
|
||||
raise HTTPException(status_code=404, detail="Podcast not found")
|
||||
|
||||
# Check permission for the search space
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
db_podcast.search_space_id,
|
||||
Permission.PODCASTS_UPDATE.value,
|
||||
"You don't have permission to update podcasts in this search space",
|
||||
)
|
||||
|
||||
update_data = podcast_update.model_dump(exclude_unset=True)
|
||||
for key, value in update_data.items():
|
||||
setattr(db_podcast, key, value)
|
||||
|
|
@ -140,8 +212,26 @@ async def delete_podcast(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""
|
||||
Delete a podcast.
|
||||
Requires PODCASTS_DELETE permission for the search space.
|
||||
"""
|
||||
try:
|
||||
db_podcast = await read_podcast(podcast_id, session, user)
|
||||
result = await session.execute(select(Podcast).filter(Podcast.id == podcast_id))
|
||||
db_podcast = result.scalars().first()
|
||||
|
||||
if not db_podcast:
|
||||
raise HTTPException(status_code=404, detail="Podcast not found")
|
||||
|
||||
# Check permission for the search space
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
db_podcast.search_space_id,
|
||||
Permission.PODCASTS_DELETE.value,
|
||||
"You don't have permission to delete podcasts in this search space",
|
||||
)
|
||||
|
||||
await session.delete(db_podcast)
|
||||
await session.commit()
|
||||
return {"message": "Podcast deleted successfully"}
|
||||
|
|
@ -181,9 +271,19 @@ async def generate_podcast(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""
|
||||
Generate a podcast from a chat or document.
|
||||
Requires PODCASTS_CREATE permission.
|
||||
"""
|
||||
try:
|
||||
# Check if the user owns the search space
|
||||
await check_ownership(session, SearchSpace, request.search_space_id, user)
|
||||
# Check if the user has permission to create podcasts
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
request.search_space_id,
|
||||
Permission.PODCASTS_CREATE.value,
|
||||
"You don't have permission to create podcasts in this search space",
|
||||
)
|
||||
|
||||
if request.type == "CHAT":
|
||||
# Verify that all chat IDs belong to this user and search space
|
||||
|
|
@ -251,22 +351,29 @@ async def stream_podcast(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""Stream a podcast audio file."""
|
||||
"""
|
||||
Stream a podcast audio file.
|
||||
Requires PODCASTS_READ permission for the search space.
|
||||
"""
|
||||
try:
|
||||
# Get the podcast and check if user has access
|
||||
result = await session.execute(
|
||||
select(Podcast)
|
||||
.join(SearchSpace)
|
||||
.filter(Podcast.id == podcast_id, SearchSpace.user_id == user.id)
|
||||
)
|
||||
result = await session.execute(select(Podcast).filter(Podcast.id == podcast_id))
|
||||
podcast = result.scalars().first()
|
||||
|
||||
if not podcast:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="Podcast not found or you don't have permission to access it",
|
||||
detail="Podcast not found",
|
||||
)
|
||||
|
||||
# Check permission for the search space
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
podcast.search_space_id,
|
||||
Permission.PODCASTS_READ.value,
|
||||
"You don't have permission to access podcasts in this search space",
|
||||
)
|
||||
|
||||
# Get the file path
|
||||
file_path = podcast.file_location
|
||||
|
||||
|
|
@ -303,12 +410,30 @@ async def get_podcast_by_chat_id(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""
|
||||
Get a podcast by its associated chat ID.
|
||||
Requires PODCASTS_READ permission for the search space.
|
||||
"""
|
||||
try:
|
||||
# Get the podcast and check if user has access
|
||||
# First get the chat to find its search space
|
||||
chat_result = await session.execute(select(Chat).filter(Chat.id == chat_id))
|
||||
chat = chat_result.scalars().first()
|
||||
|
||||
if not chat:
|
||||
return None
|
||||
|
||||
# Check permission for the search space
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
chat.search_space_id,
|
||||
Permission.PODCASTS_READ.value,
|
||||
"You don't have permission to read podcasts in this search space",
|
||||
)
|
||||
|
||||
# Get the podcast
|
||||
result = await session.execute(
|
||||
select(Podcast)
|
||||
.join(SearchSpace)
|
||||
.filter(Podcast.chat_id == chat_id, SearchSpace.user_id == user.id)
|
||||
select(Podcast).filter(Podcast.chat_id == chat_id)
|
||||
)
|
||||
podcast = result.scalars().first()
|
||||
|
||||
|
|
|
|||
1084
surfsense_backend/app/routes/rbac_routes.py
Normal file
1084
surfsense_backend/app/routes/rbac_routes.py
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -22,9 +22,9 @@ from sqlalchemy.future import select
|
|||
|
||||
from app.connectors.github_connector import GitHubConnector
|
||||
from app.db import (
|
||||
Permission,
|
||||
SearchSourceConnector,
|
||||
SearchSourceConnectorType,
|
||||
SearchSpace,
|
||||
User,
|
||||
async_session_maker,
|
||||
get_async_session,
|
||||
|
|
@ -39,6 +39,7 @@ from app.tasks.connector_indexers import (
|
|||
index_airtable_records,
|
||||
index_clickup_tasks,
|
||||
index_confluence_pages,
|
||||
index_crawled_urls,
|
||||
index_discord_messages,
|
||||
index_elasticsearch_documents,
|
||||
index_github_repos,
|
||||
|
|
@ -51,12 +52,12 @@ from app.tasks.connector_indexers import (
|
|||
index_slack_messages,
|
||||
)
|
||||
from app.users import current_active_user
|
||||
from app.utils.check_ownership import check_ownership
|
||||
from app.utils.periodic_scheduler import (
|
||||
create_periodic_schedule,
|
||||
delete_periodic_schedule,
|
||||
update_periodic_schedule,
|
||||
)
|
||||
from app.utils.rbac import check_permission
|
||||
|
||||
# Set up logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -107,19 +108,25 @@ async def create_search_source_connector(
|
|||
):
|
||||
"""
|
||||
Create a new search source connector.
|
||||
Requires CONNECTORS_CREATE permission.
|
||||
|
||||
Each search space can have only one connector of each type per user (based on search_space_id, user_id, and connector_type).
|
||||
Each search space can have only one connector of each type (based on search_space_id and connector_type).
|
||||
The config must contain the appropriate keys for the connector type.
|
||||
"""
|
||||
try:
|
||||
# Check if the search space belongs to the user
|
||||
await check_ownership(session, SearchSpace, search_space_id, user)
|
||||
# Check if user has permission to create connectors
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
search_space_id,
|
||||
Permission.CONNECTORS_CREATE.value,
|
||||
"You don't have permission to create connectors in this search space",
|
||||
)
|
||||
|
||||
# Check if a connector with the same type already exists for this search space and user
|
||||
# Check if a connector with the same type already exists for this search space
|
||||
result = await session.execute(
|
||||
select(SearchSourceConnector).filter(
|
||||
SearchSourceConnector.search_space_id == search_space_id,
|
||||
SearchSourceConnector.user_id == user.id,
|
||||
SearchSourceConnector.connector_type == connector.connector_type,
|
||||
)
|
||||
)
|
||||
|
|
@ -127,7 +134,7 @@ async def create_search_source_connector(
|
|||
if existing_connector:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail=f"A connector with type {connector.connector_type} already exists in this search space. Each search space can have only one connector of each type per user.",
|
||||
detail=f"A connector with type {connector.connector_type} already exists in this search space.",
|
||||
)
|
||||
|
||||
# Prepare connector data
|
||||
|
|
@ -197,22 +204,34 @@ async def read_search_source_connectors(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""List all search source connectors for the current user, optionally filtered by search space."""
|
||||
"""
|
||||
List all search source connectors for a search space.
|
||||
Requires CONNECTORS_READ permission.
|
||||
"""
|
||||
try:
|
||||
query = select(SearchSourceConnector).filter(
|
||||
SearchSourceConnector.user_id == user.id
|
||||
if search_space_id is None:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="search_space_id is required",
|
||||
)
|
||||
|
||||
# Check if user has permission to read connectors
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
search_space_id,
|
||||
Permission.CONNECTORS_READ.value,
|
||||
"You don't have permission to view connectors in this search space",
|
||||
)
|
||||
|
||||
# Filter by search_space_id if provided
|
||||
if search_space_id is not None:
|
||||
# Verify the search space belongs to the user
|
||||
await check_ownership(session, SearchSpace, search_space_id, user)
|
||||
query = query.filter(
|
||||
SearchSourceConnector.search_space_id == search_space_id
|
||||
)
|
||||
query = select(SearchSourceConnector).filter(
|
||||
SearchSourceConnector.search_space_id == search_space_id
|
||||
)
|
||||
|
||||
result = await session.execute(query.offset(skip).limit(limit))
|
||||
return result.scalars().all()
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
|
|
@ -228,9 +247,32 @@ async def read_search_source_connector(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""Get a specific search source connector by ID."""
|
||||
"""
|
||||
Get a specific search source connector by ID.
|
||||
Requires CONNECTORS_READ permission.
|
||||
"""
|
||||
try:
|
||||
return await check_ownership(session, SearchSourceConnector, connector_id, user)
|
||||
# Get the connector first
|
||||
result = await session.execute(
|
||||
select(SearchSourceConnector).filter(
|
||||
SearchSourceConnector.id == connector_id
|
||||
)
|
||||
)
|
||||
connector = result.scalars().first()
|
||||
|
||||
if not connector:
|
||||
raise HTTPException(status_code=404, detail="Connector not found")
|
||||
|
||||
# Check permission
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
connector.search_space_id,
|
||||
Permission.CONNECTORS_READ.value,
|
||||
"You don't have permission to view this connector",
|
||||
)
|
||||
|
||||
return connector
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
|
|
@ -250,10 +292,25 @@ async def update_search_source_connector(
|
|||
):
|
||||
"""
|
||||
Update a search source connector.
|
||||
Requires CONNECTORS_UPDATE permission.
|
||||
Handles partial updates, including merging changes into the 'config' field.
|
||||
"""
|
||||
db_connector = await check_ownership(
|
||||
session, SearchSourceConnector, connector_id, user
|
||||
# Get the connector first
|
||||
result = await session.execute(
|
||||
select(SearchSourceConnector).filter(SearchSourceConnector.id == connector_id)
|
||||
)
|
||||
db_connector = result.scalars().first()
|
||||
|
||||
if not db_connector:
|
||||
raise HTTPException(status_code=404, detail="Connector not found")
|
||||
|
||||
# Check permission
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
db_connector.search_space_id,
|
||||
Permission.CONNECTORS_UPDATE.value,
|
||||
"You don't have permission to update this connector",
|
||||
)
|
||||
|
||||
# Convert the sparse update data (only fields present in request) to a dict
|
||||
|
|
@ -348,20 +405,19 @@ async def update_search_source_connector(
|
|||
for key, value in update_data.items():
|
||||
# Prevent changing connector_type if it causes a duplicate (check moved here)
|
||||
if key == "connector_type" and value != db_connector.connector_type:
|
||||
result = await session.execute(
|
||||
check_result = await session.execute(
|
||||
select(SearchSourceConnector).filter(
|
||||
SearchSourceConnector.search_space_id
|
||||
== db_connector.search_space_id,
|
||||
SearchSourceConnector.user_id == user.id,
|
||||
SearchSourceConnector.connector_type == value,
|
||||
SearchSourceConnector.id != connector_id,
|
||||
)
|
||||
)
|
||||
existing_connector = result.scalars().first()
|
||||
existing_connector = check_result.scalars().first()
|
||||
if existing_connector:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail=f"A connector with type {value} already exists in this search space. Each search space can have only one connector of each type per user.",
|
||||
detail=f"A connector with type {value} already exists in this search space.",
|
||||
)
|
||||
|
||||
setattr(db_connector, key, value)
|
||||
|
|
@ -424,10 +480,29 @@ async def delete_search_source_connector(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""Delete a search source connector."""
|
||||
"""
|
||||
Delete a search source connector.
|
||||
Requires CONNECTORS_DELETE permission.
|
||||
"""
|
||||
try:
|
||||
db_connector = await check_ownership(
|
||||
session, SearchSourceConnector, connector_id, user
|
||||
# Get the connector first
|
||||
result = await session.execute(
|
||||
select(SearchSourceConnector).filter(
|
||||
SearchSourceConnector.id == connector_id
|
||||
)
|
||||
)
|
||||
db_connector = result.scalars().first()
|
||||
|
||||
if not db_connector:
|
||||
raise HTTPException(status_code=404, detail="Connector not found")
|
||||
|
||||
# Check permission
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
db_connector.search_space_id,
|
||||
Permission.CONNECTORS_DELETE.value,
|
||||
"You don't have permission to delete this connector",
|
||||
)
|
||||
|
||||
# Delete any periodic schedule associated with this connector
|
||||
|
|
@ -472,6 +547,7 @@ async def index_connector_content(
|
|||
):
|
||||
"""
|
||||
Index content from a connector to a search space.
|
||||
Requires CONNECTORS_UPDATE permission (to trigger indexing).
|
||||
|
||||
Currently supports:
|
||||
- SLACK_CONNECTOR: Indexes messages from all accessible Slack channels
|
||||
|
|
@ -482,24 +558,34 @@ async def index_connector_content(
|
|||
- DISCORD_CONNECTOR: Indexes messages from all accessible Discord channels
|
||||
- LUMA_CONNECTOR: Indexes events from Luma
|
||||
- ELASTICSEARCH_CONNECTOR: Indexes documents from Elasticsearch
|
||||
- WEBCRAWLER_CONNECTOR: Indexes web pages from crawled websites
|
||||
|
||||
Args:
|
||||
connector_id: ID of the connector to use
|
||||
search_space_id: ID of the search space to store indexed content
|
||||
background_tasks: FastAPI background tasks
|
||||
|
||||
Returns:
|
||||
Dictionary with indexing status
|
||||
"""
|
||||
try:
|
||||
# Check if the connector belongs to the user
|
||||
connector = await check_ownership(
|
||||
session, SearchSourceConnector, connector_id, user
|
||||
# Get the connector first
|
||||
result = await session.execute(
|
||||
select(SearchSourceConnector).filter(
|
||||
SearchSourceConnector.id == connector_id
|
||||
)
|
||||
)
|
||||
connector = result.scalars().first()
|
||||
|
||||
# Check if the search space belongs to the user
|
||||
_search_space = await check_ownership(
|
||||
session, SearchSpace, search_space_id, user
|
||||
if not connector:
|
||||
raise HTTPException(status_code=404, detail="Connector not found")
|
||||
|
||||
# Check if user has permission to update connectors (indexing is an update operation)
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
search_space_id,
|
||||
Permission.CONNECTORS_UPDATE.value,
|
||||
"You don't have permission to index content in this search space",
|
||||
)
|
||||
|
||||
# Handle different connector types
|
||||
|
|
@ -688,6 +774,17 @@ async def index_connector_content(
|
|||
)
|
||||
response_message = "Elasticsearch indexing started in the background."
|
||||
|
||||
elif connector.connector_type == SearchSourceConnectorType.WEBCRAWLER_CONNECTOR:
|
||||
from app.tasks.celery_tasks.connector_tasks import index_crawled_urls_task
|
||||
|
||||
logger.info(
|
||||
f"Triggering web pages indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"
|
||||
)
|
||||
index_crawled_urls_task.delay(
|
||||
connector_id, search_space_id, str(user.id), indexing_from, indexing_to
|
||||
)
|
||||
response_message = "Web page indexing started in the background."
|
||||
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
|
|
@ -1523,3 +1620,64 @@ async def run_elasticsearch_indexing(
|
|||
f"Critical error in run_elasticsearch_indexing for connector {connector_id}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
|
||||
# Add new helper functions for crawled web page indexing
|
||||
async def run_web_page_indexing_with_new_session(
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
):
|
||||
"""
|
||||
Create a new session and run the Web page indexing task.
|
||||
This prevents session leaks by creating a dedicated session for the background task.
|
||||
"""
|
||||
async with async_session_maker() as session:
|
||||
await run_web_page_indexing(
|
||||
session, connector_id, search_space_id, user_id, start_date, end_date
|
||||
)
|
||||
|
||||
|
||||
async def run_web_page_indexing(
|
||||
session: AsyncSession,
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
):
|
||||
"""
|
||||
Background task to run Web page indexing.
|
||||
Args:
|
||||
session: Database session
|
||||
connector_id: ID of the webcrawler connector
|
||||
search_space_id: ID of the search space
|
||||
user_id: ID of the user
|
||||
start_date: Start date for indexing
|
||||
end_date: End date for indexing
|
||||
"""
|
||||
try:
|
||||
documents_processed, error_or_warning = await index_crawled_urls(
|
||||
session=session,
|
||||
connector_id=connector_id,
|
||||
search_space_id=search_space_id,
|
||||
user_id=user_id,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
update_last_indexed=False, # Don't update timestamp in the indexing function
|
||||
)
|
||||
|
||||
# Only update last_indexed_at if indexing was successful (either new docs or updated docs)
|
||||
if documents_processed > 0:
|
||||
await update_connector_last_indexed(session, connector_id)
|
||||
logger.info(
|
||||
f"Web page indexing completed successfully: {documents_processed} documents processed"
|
||||
)
|
||||
else:
|
||||
logger.error(
|
||||
f"Web page indexing failed or no documents processed: {error_or_warning}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in background Web page indexing task: {e!s}")
|
||||
|
|
|
|||
|
|
@ -1,18 +1,77 @@
|
|||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.future import select
|
||||
|
||||
from app.db import SearchSpace, User, get_async_session
|
||||
from app.schemas import SearchSpaceCreate, SearchSpaceRead, SearchSpaceUpdate
|
||||
from app.db import (
|
||||
Permission,
|
||||
SearchSpace,
|
||||
SearchSpaceMembership,
|
||||
SearchSpaceRole,
|
||||
User,
|
||||
get_async_session,
|
||||
get_default_roles_config,
|
||||
)
|
||||
from app.schemas import (
|
||||
SearchSpaceCreate,
|
||||
SearchSpaceRead,
|
||||
SearchSpaceUpdate,
|
||||
SearchSpaceWithStats,
|
||||
)
|
||||
from app.users import current_active_user
|
||||
from app.utils.check_ownership import check_ownership
|
||||
from app.utils.rbac import check_permission, check_search_space_access
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
async def create_default_roles_and_membership(
|
||||
session: AsyncSession,
|
||||
search_space_id: int,
|
||||
owner_user_id,
|
||||
) -> None:
|
||||
"""
|
||||
Create default system roles for a search space and add the owner as a member.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
search_space_id: The ID of the newly created search space
|
||||
owner_user_id: The UUID of the user who created the search space
|
||||
"""
|
||||
# Create default roles
|
||||
default_roles = get_default_roles_config()
|
||||
owner_role_id = None
|
||||
|
||||
for role_config in default_roles:
|
||||
db_role = SearchSpaceRole(
|
||||
name=role_config["name"],
|
||||
description=role_config["description"],
|
||||
permissions=role_config["permissions"],
|
||||
is_default=role_config["is_default"],
|
||||
is_system_role=role_config["is_system_role"],
|
||||
search_space_id=search_space_id,
|
||||
)
|
||||
session.add(db_role)
|
||||
await session.flush() # Get the ID
|
||||
|
||||
if role_config["name"] == "Owner":
|
||||
owner_role_id = db_role.id
|
||||
|
||||
# Create owner membership
|
||||
owner_membership = SearchSpaceMembership(
|
||||
user_id=owner_user_id,
|
||||
search_space_id=search_space_id,
|
||||
role_id=owner_role_id,
|
||||
is_owner=True,
|
||||
)
|
||||
session.add(owner_membership)
|
||||
|
||||
|
||||
@router.post("/searchspaces", response_model=SearchSpaceRead)
|
||||
async def create_search_space(
|
||||
search_space: SearchSpaceCreate,
|
||||
|
|
@ -27,6 +86,11 @@ async def create_search_space(
|
|||
|
||||
db_search_space = SearchSpace(**search_space_data, user_id=user.id)
|
||||
session.add(db_search_space)
|
||||
await session.flush() # Get the search space ID
|
||||
|
||||
# Create default roles and owner membership
|
||||
await create_default_roles_and_membership(session, db_search_space.id, user.id)
|
||||
|
||||
await session.commit()
|
||||
await session.refresh(db_search_space)
|
||||
return db_search_space
|
||||
|
|
@ -34,26 +98,86 @@ async def create_search_space(
|
|||
raise
|
||||
except Exception as e:
|
||||
await session.rollback()
|
||||
logger.error(f"Failed to create search space: {e!s}", exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to create search space: {e!s}"
|
||||
) from e
|
||||
|
||||
|
||||
@router.get("/searchspaces", response_model=list[SearchSpaceRead])
|
||||
@router.get("/searchspaces", response_model=list[SearchSpaceWithStats])
|
||||
async def read_search_spaces(
|
||||
skip: int = 0,
|
||||
limit: int = 200,
|
||||
owned_only: bool = False,
|
||||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""
|
||||
Get all search spaces the user has access to, with member count and ownership info.
|
||||
|
||||
Args:
|
||||
skip: Number of items to skip
|
||||
limit: Maximum number of items to return
|
||||
owned_only: If True, only return search spaces owned by the user.
|
||||
If False (default), return all search spaces the user has access to.
|
||||
"""
|
||||
try:
|
||||
result = await session.execute(
|
||||
select(SearchSpace)
|
||||
.filter(SearchSpace.user_id == user.id)
|
||||
.offset(skip)
|
||||
.limit(limit)
|
||||
)
|
||||
return result.scalars().all()
|
||||
if owned_only:
|
||||
# Return only search spaces where user is the original creator (user_id)
|
||||
result = await session.execute(
|
||||
select(SearchSpace)
|
||||
.filter(SearchSpace.user_id == user.id)
|
||||
.offset(skip)
|
||||
.limit(limit)
|
||||
)
|
||||
else:
|
||||
# Return all search spaces the user has membership in
|
||||
result = await session.execute(
|
||||
select(SearchSpace)
|
||||
.join(SearchSpaceMembership)
|
||||
.filter(SearchSpaceMembership.user_id == user.id)
|
||||
.offset(skip)
|
||||
.limit(limit)
|
||||
)
|
||||
|
||||
search_spaces = result.scalars().all()
|
||||
|
||||
# Get member counts and ownership info for each search space
|
||||
search_spaces_with_stats = []
|
||||
for space in search_spaces:
|
||||
# Get member count
|
||||
count_result = await session.execute(
|
||||
select(func.count(SearchSpaceMembership.id)).filter(
|
||||
SearchSpaceMembership.search_space_id == space.id
|
||||
)
|
||||
)
|
||||
member_count = count_result.scalar() or 1
|
||||
|
||||
# Check if current user is owner
|
||||
ownership_result = await session.execute(
|
||||
select(SearchSpaceMembership).filter(
|
||||
SearchSpaceMembership.search_space_id == space.id,
|
||||
SearchSpaceMembership.user_id == user.id,
|
||||
SearchSpaceMembership.is_owner == True, # noqa: E712
|
||||
)
|
||||
)
|
||||
is_owner = ownership_result.scalars().first() is not None
|
||||
|
||||
search_spaces_with_stats.append(
|
||||
SearchSpaceWithStats(
|
||||
id=space.id,
|
||||
name=space.name,
|
||||
description=space.description,
|
||||
created_at=space.created_at,
|
||||
user_id=space.user_id,
|
||||
citations_enabled=space.citations_enabled,
|
||||
qna_custom_instructions=space.qna_custom_instructions,
|
||||
member_count=member_count,
|
||||
is_owner=is_owner,
|
||||
)
|
||||
)
|
||||
|
||||
return search_spaces_with_stats
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to fetch search spaces: {e!s}"
|
||||
|
|
@ -97,10 +221,22 @@ async def read_search_space(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""
|
||||
Get a specific search space by ID.
|
||||
Requires SETTINGS_VIEW permission or membership.
|
||||
"""
|
||||
try:
|
||||
search_space = await check_ownership(
|
||||
session, SearchSpace, search_space_id, user
|
||||
# Check if user has access (is a member)
|
||||
await check_search_space_access(session, user, search_space_id)
|
||||
|
||||
result = await session.execute(
|
||||
select(SearchSpace).filter(SearchSpace.id == search_space_id)
|
||||
)
|
||||
search_space = result.scalars().first()
|
||||
|
||||
if not search_space:
|
||||
raise HTTPException(status_code=404, detail="Search space not found")
|
||||
|
||||
return search_space
|
||||
|
||||
except HTTPException:
|
||||
|
|
@ -118,10 +254,28 @@ async def update_search_space(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""
|
||||
Update a search space.
|
||||
Requires SETTINGS_UPDATE permission.
|
||||
"""
|
||||
try:
|
||||
db_search_space = await check_ownership(
|
||||
session, SearchSpace, search_space_id, user
|
||||
# Check permission
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
search_space_id,
|
||||
Permission.SETTINGS_UPDATE.value,
|
||||
"You don't have permission to update this search space",
|
||||
)
|
||||
|
||||
result = await session.execute(
|
||||
select(SearchSpace).filter(SearchSpace.id == search_space_id)
|
||||
)
|
||||
db_search_space = result.scalars().first()
|
||||
|
||||
if not db_search_space:
|
||||
raise HTTPException(status_code=404, detail="Search space not found")
|
||||
|
||||
update_data = search_space_update.model_dump(exclude_unset=True)
|
||||
for key, value in update_data.items():
|
||||
setattr(db_search_space, key, value)
|
||||
|
|
@ -143,10 +297,28 @@ async def delete_search_space(
|
|||
session: AsyncSession = Depends(get_async_session),
|
||||
user: User = Depends(current_active_user),
|
||||
):
|
||||
"""
|
||||
Delete a search space.
|
||||
Requires SETTINGS_DELETE permission (only owners have this by default).
|
||||
"""
|
||||
try:
|
||||
db_search_space = await check_ownership(
|
||||
session, SearchSpace, search_space_id, user
|
||||
# Check permission - only those with SETTINGS_DELETE can delete
|
||||
await check_permission(
|
||||
session,
|
||||
user,
|
||||
search_space_id,
|
||||
Permission.SETTINGS_DELETE.value,
|
||||
"You don't have permission to delete this search space",
|
||||
)
|
||||
|
||||
result = await session.execute(
|
||||
select(SearchSpace).filter(SearchSpace.id == search_space_id)
|
||||
)
|
||||
db_search_space = result.scalars().first()
|
||||
|
||||
if not db_search_space:
|
||||
raise HTTPException(status_code=404, detail="Search space not found")
|
||||
|
||||
await session.delete(db_search_space)
|
||||
await session.commit()
|
||||
return {"message": "Search space deleted successfully"}
|
||||
|
|
|
|||
|
|
@ -27,6 +27,23 @@ from .podcasts import (
|
|||
PodcastRead,
|
||||
PodcastUpdate,
|
||||
)
|
||||
from .rbac_schemas import (
|
||||
InviteAcceptRequest,
|
||||
InviteAcceptResponse,
|
||||
InviteCreate,
|
||||
InviteInfoResponse,
|
||||
InviteRead,
|
||||
InviteUpdate,
|
||||
MembershipRead,
|
||||
MembershipReadWithUser,
|
||||
MembershipUpdate,
|
||||
PermissionInfo,
|
||||
PermissionsListResponse,
|
||||
RoleCreate,
|
||||
RoleRead,
|
||||
RoleUpdate,
|
||||
UserSearchSpaceAccess,
|
||||
)
|
||||
from .search_source_connector import (
|
||||
SearchSourceConnectorBase,
|
||||
SearchSourceConnectorCreate,
|
||||
|
|
@ -38,6 +55,7 @@ from .search_space import (
|
|||
SearchSpaceCreate,
|
||||
SearchSpaceRead,
|
||||
SearchSpaceUpdate,
|
||||
SearchSpaceWithStats,
|
||||
)
|
||||
from .users import UserCreate, UserRead, UserUpdate
|
||||
|
||||
|
|
@ -60,6 +78,13 @@ __all__ = [
|
|||
"ExtensionDocumentContent",
|
||||
"ExtensionDocumentMetadata",
|
||||
"IDModel",
|
||||
# RBAC schemas
|
||||
"InviteAcceptRequest",
|
||||
"InviteAcceptResponse",
|
||||
"InviteCreate",
|
||||
"InviteInfoResponse",
|
||||
"InviteRead",
|
||||
"InviteUpdate",
|
||||
"LLMConfigBase",
|
||||
"LLMConfigCreate",
|
||||
"LLMConfigRead",
|
||||
|
|
@ -69,12 +94,20 @@ __all__ = [
|
|||
"LogFilter",
|
||||
"LogRead",
|
||||
"LogUpdate",
|
||||
"MembershipRead",
|
||||
"MembershipReadWithUser",
|
||||
"MembershipUpdate",
|
||||
"PaginatedResponse",
|
||||
"PermissionInfo",
|
||||
"PermissionsListResponse",
|
||||
"PodcastBase",
|
||||
"PodcastCreate",
|
||||
"PodcastGenerateRequest",
|
||||
"PodcastRead",
|
||||
"PodcastUpdate",
|
||||
"RoleCreate",
|
||||
"RoleRead",
|
||||
"RoleUpdate",
|
||||
"SearchSourceConnectorBase",
|
||||
"SearchSourceConnectorCreate",
|
||||
"SearchSourceConnectorRead",
|
||||
|
|
@ -83,8 +116,10 @@ __all__ = [
|
|||
"SearchSpaceCreate",
|
||||
"SearchSpaceRead",
|
||||
"SearchSpaceUpdate",
|
||||
"SearchSpaceWithStats",
|
||||
"TimestampModel",
|
||||
"UserCreate",
|
||||
"UserRead",
|
||||
"UserSearchSpaceAccess",
|
||||
"UserUpdate",
|
||||
]
|
||||
|
|
|
|||
186
surfsense_backend/app/schemas/rbac_schemas.py
Normal file
186
surfsense_backend/app/schemas/rbac_schemas.py
Normal file
|
|
@ -0,0 +1,186 @@
|
|||
"""
|
||||
Pydantic schemas for RBAC (Role-Based Access Control) endpoints.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from uuid import UUID
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
# ============ Role Schemas ============
|
||||
|
||||
|
||||
class RoleBase(BaseModel):
|
||||
"""Base schema for roles."""
|
||||
|
||||
name: str = Field(..., min_length=1, max_length=100)
|
||||
description: str | None = Field(None, max_length=500)
|
||||
permissions: list[str] = Field(default_factory=list)
|
||||
is_default: bool = False
|
||||
|
||||
|
||||
class RoleCreate(RoleBase):
|
||||
"""Schema for creating a new role."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class RoleUpdate(BaseModel):
|
||||
"""Schema for updating a role (partial update)."""
|
||||
|
||||
name: str | None = Field(None, min_length=1, max_length=100)
|
||||
description: str | None = Field(None, max_length=500)
|
||||
permissions: list[str] | None = None
|
||||
is_default: bool | None = None
|
||||
|
||||
|
||||
class RoleRead(RoleBase):
|
||||
"""Schema for reading a role."""
|
||||
|
||||
id: int
|
||||
search_space_id: int
|
||||
is_system_role: bool
|
||||
created_at: datetime
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
# ============ Membership Schemas ============
|
||||
|
||||
|
||||
class MembershipBase(BaseModel):
|
||||
"""Base schema for memberships."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class MembershipUpdate(BaseModel):
|
||||
"""Schema for updating a membership (change role)."""
|
||||
|
||||
role_id: int | None = None
|
||||
|
||||
|
||||
class MembershipRead(BaseModel):
|
||||
"""Schema for reading a membership."""
|
||||
|
||||
id: int
|
||||
user_id: UUID
|
||||
search_space_id: int
|
||||
role_id: int | None
|
||||
is_owner: bool
|
||||
joined_at: datetime
|
||||
created_at: datetime
|
||||
# Nested role info
|
||||
role: RoleRead | None = None
|
||||
# User email (populated separately)
|
||||
user_email: str | None = None
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class MembershipReadWithUser(MembershipRead):
|
||||
"""Schema for reading a membership with user details."""
|
||||
|
||||
user_email: str | None = None
|
||||
user_is_active: bool | None = None
|
||||
|
||||
|
||||
# ============ Invite Schemas ============
|
||||
|
||||
|
||||
class InviteBase(BaseModel):
|
||||
"""Base schema for invites."""
|
||||
|
||||
name: str | None = Field(None, max_length=100)
|
||||
role_id: int | None = None
|
||||
expires_at: datetime | None = None
|
||||
max_uses: int | None = Field(None, ge=1)
|
||||
|
||||
|
||||
class InviteCreate(InviteBase):
|
||||
"""Schema for creating a new invite."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class InviteUpdate(BaseModel):
|
||||
"""Schema for updating an invite (partial update)."""
|
||||
|
||||
name: str | None = Field(None, max_length=100)
|
||||
role_id: int | None = None
|
||||
expires_at: datetime | None = None
|
||||
max_uses: int | None = Field(None, ge=1)
|
||||
is_active: bool | None = None
|
||||
|
||||
|
||||
class InviteRead(InviteBase):
|
||||
"""Schema for reading an invite."""
|
||||
|
||||
id: int
|
||||
invite_code: str
|
||||
search_space_id: int
|
||||
created_by_id: UUID | None
|
||||
uses_count: int
|
||||
is_active: bool
|
||||
created_at: datetime
|
||||
# Nested role info
|
||||
role: RoleRead | None = None
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
|
||||
class InviteAcceptRequest(BaseModel):
|
||||
"""Schema for accepting an invite."""
|
||||
|
||||
invite_code: str = Field(..., min_length=1)
|
||||
|
||||
|
||||
class InviteAcceptResponse(BaseModel):
|
||||
"""Response schema for accepting an invite."""
|
||||
|
||||
message: str
|
||||
search_space_id: int
|
||||
search_space_name: str
|
||||
role_name: str | None
|
||||
|
||||
|
||||
class InviteInfoResponse(BaseModel):
|
||||
"""Response schema for getting invite info (public endpoint)."""
|
||||
|
||||
search_space_name: str
|
||||
role_name: str | None
|
||||
is_valid: bool
|
||||
message: str | None = None
|
||||
|
||||
|
||||
# ============ Permission Schemas ============
|
||||
|
||||
|
||||
class PermissionInfo(BaseModel):
|
||||
"""Schema for permission information."""
|
||||
|
||||
value: str
|
||||
name: str
|
||||
category: str
|
||||
|
||||
|
||||
class PermissionsListResponse(BaseModel):
|
||||
"""Response schema for listing all available permissions."""
|
||||
|
||||
permissions: list[PermissionInfo]
|
||||
|
||||
|
||||
# ============ User Access Info ============
|
||||
|
||||
|
||||
class UserSearchSpaceAccess(BaseModel):
|
||||
"""Schema for user's access info in a search space."""
|
||||
|
||||
search_space_id: int
|
||||
search_space_name: str
|
||||
is_owner: bool
|
||||
role_name: str | None
|
||||
permissions: list[str]
|
||||
|
|
@ -34,3 +34,10 @@ class SearchSpaceRead(SearchSpaceBase, IDModel, TimestampModel):
|
|||
qna_custom_instructions: str | None = None
|
||||
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
|
||||
class SearchSpaceWithStats(SearchSpaceRead):
|
||||
"""Extended search space info with member count and ownership status."""
|
||||
|
||||
member_count: int = 1
|
||||
is_owner: bool = False
|
||||
|
|
|
|||
|
|
@ -15,18 +15,17 @@ from app.db import (
|
|||
Document,
|
||||
SearchSourceConnector,
|
||||
SearchSourceConnectorType,
|
||||
SearchSpace,
|
||||
)
|
||||
from app.retriver.chunks_hybrid_search import ChucksHybridSearchRetriever
|
||||
from app.retriver.documents_hybrid_search import DocumentHybridSearchRetriever
|
||||
|
||||
|
||||
class ConnectorService:
|
||||
def __init__(self, session: AsyncSession, user_id: str | None = None):
|
||||
def __init__(self, session: AsyncSession, search_space_id: int | None = None):
|
||||
self.session = session
|
||||
self.chunk_retriever = ChucksHybridSearchRetriever(session)
|
||||
self.document_retriever = DocumentHybridSearchRetriever(session)
|
||||
self.user_id = user_id
|
||||
self.search_space_id = search_space_id
|
||||
self.source_id_counter = (
|
||||
100000 # High starting value to avoid collisions with existing IDs
|
||||
)
|
||||
|
|
@ -36,23 +35,22 @@ class ConnectorService:
|
|||
|
||||
async def initialize_counter(self):
|
||||
"""
|
||||
Initialize the source_id_counter based on the total number of chunks for the user.
|
||||
Initialize the source_id_counter based on the total number of chunks for the search space.
|
||||
This ensures unique IDs across different sessions.
|
||||
"""
|
||||
if self.user_id:
|
||||
if self.search_space_id:
|
||||
try:
|
||||
# Count total chunks for documents belonging to this user
|
||||
# Count total chunks for documents belonging to this search space
|
||||
|
||||
result = await self.session.execute(
|
||||
select(func.count(Chunk.id))
|
||||
.join(Document)
|
||||
.join(SearchSpace)
|
||||
.filter(SearchSpace.user_id == self.user_id)
|
||||
.filter(Document.search_space_id == self.search_space_id)
|
||||
)
|
||||
chunk_count = result.scalar() or 0
|
||||
self.source_id_counter = chunk_count + 1
|
||||
print(
|
||||
f"Initialized source_id_counter to {self.source_id_counter} for user {self.user_id}"
|
||||
f"Initialized source_id_counter to {self.source_id_counter} for search space {self.search_space_id}"
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Error initializing source_id_counter: {e!s}")
|
||||
|
|
@ -62,7 +60,6 @@ class ConnectorService:
|
|||
async def search_crawled_urls(
|
||||
self,
|
||||
user_query: str,
|
||||
user_id: str,
|
||||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
|
|
@ -70,6 +67,12 @@ class ConnectorService:
|
|||
"""
|
||||
Search for crawled URLs and return both the source information and langchain documents
|
||||
|
||||
Args:
|
||||
user_query: The user's query
|
||||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||
|
||||
Returns:
|
||||
tuple: (sources_info, langchain_documents)
|
||||
"""
|
||||
|
|
@ -77,7 +80,6 @@ class ConnectorService:
|
|||
crawled_urls_chunks = await self.chunk_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="CRAWLED_URL",
|
||||
)
|
||||
|
|
@ -85,7 +87,6 @@ class ConnectorService:
|
|||
crawled_urls_chunks = await self.document_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="CRAWLED_URL",
|
||||
)
|
||||
|
|
@ -109,15 +110,43 @@ class ConnectorService:
|
|||
document = chunk.get("document", {})
|
||||
metadata = document.get("metadata", {})
|
||||
|
||||
# Create a source entry
|
||||
# Extract webcrawler-specific metadata
|
||||
url = metadata.get("source", metadata.get("url", ""))
|
||||
title = document.get(
|
||||
"title", metadata.get("title", "Untitled Document")
|
||||
)
|
||||
description = metadata.get("description", "")
|
||||
language = metadata.get("language", "")
|
||||
last_crawled_at = metadata.get("last_crawled_at", "")
|
||||
|
||||
# Build description with crawler info
|
||||
content_preview = chunk.get("content", "")
|
||||
if not description and content_preview:
|
||||
# Use content preview if no description
|
||||
description = content_preview[:200]
|
||||
if len(content_preview) > 200:
|
||||
description += "..."
|
||||
|
||||
# Add crawler metadata to description if available
|
||||
info_parts = []
|
||||
if language:
|
||||
info_parts.append(f"Language: {language}")
|
||||
if last_crawled_at:
|
||||
info_parts.append(f"Last crawled: {last_crawled_at}")
|
||||
|
||||
if info_parts:
|
||||
if description:
|
||||
description += f" | {' | '.join(info_parts)}"
|
||||
else:
|
||||
description = " | ".join(info_parts)
|
||||
|
||||
source = {
|
||||
"id": chunk.get("chunk_id", self.source_id_counter),
|
||||
"title": document.get("title", "Untitled Document"),
|
||||
"description": metadata.get(
|
||||
"og:description",
|
||||
metadata.get("ogDescription", chunk.get("content", "")),
|
||||
),
|
||||
"url": metadata.get("url", ""),
|
||||
"title": title,
|
||||
"description": description,
|
||||
"url": url,
|
||||
"language": language,
|
||||
"last_crawled_at": last_crawled_at,
|
||||
}
|
||||
|
||||
self.source_id_counter += 1
|
||||
|
|
@ -136,7 +165,6 @@ class ConnectorService:
|
|||
async def search_files(
|
||||
self,
|
||||
user_query: str,
|
||||
user_id: str,
|
||||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
|
|
@ -151,7 +179,6 @@ class ConnectorService:
|
|||
files_chunks = await self.chunk_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="FILE",
|
||||
)
|
||||
|
|
@ -159,7 +186,6 @@ class ConnectorService:
|
|||
files_chunks = await self.document_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="FILE",
|
||||
)
|
||||
|
|
@ -239,43 +265,35 @@ class ConnectorService:
|
|||
|
||||
async def get_connector_by_type(
|
||||
self,
|
||||
user_id: str,
|
||||
connector_type: SearchSourceConnectorType,
|
||||
search_space_id: int | None = None,
|
||||
search_space_id: int,
|
||||
) -> SearchSourceConnector | None:
|
||||
"""
|
||||
Get a connector by type for a specific user and optionally a search space
|
||||
Get a connector by type for a specific search space
|
||||
|
||||
Args:
|
||||
user_id: The user's ID
|
||||
connector_type: The connector type to retrieve
|
||||
search_space_id: Optional search space ID to filter by
|
||||
search_space_id: The search space ID to filter by
|
||||
|
||||
Returns:
|
||||
Optional[SearchSourceConnector]: The connector if found, None otherwise
|
||||
"""
|
||||
query = select(SearchSourceConnector).filter(
|
||||
SearchSourceConnector.user_id == user_id,
|
||||
SearchSourceConnector.search_space_id == search_space_id,
|
||||
SearchSourceConnector.connector_type == connector_type,
|
||||
)
|
||||
|
||||
if search_space_id is not None:
|
||||
query = query.filter(
|
||||
SearchSourceConnector.search_space_id == search_space_id
|
||||
)
|
||||
|
||||
result = await self.session.execute(query)
|
||||
return result.scalars().first()
|
||||
|
||||
async def search_tavily(
|
||||
self, user_query: str, user_id: str, search_space_id: int, top_k: int = 20
|
||||
self, user_query: str, search_space_id: int, top_k: int = 20
|
||||
) -> tuple:
|
||||
"""
|
||||
Search using Tavily API and return both the source information and documents
|
||||
|
||||
Args:
|
||||
user_query: The user's query
|
||||
user_id: The user's ID
|
||||
search_space_id: The search space ID
|
||||
top_k: Maximum number of results to return
|
||||
|
||||
|
|
@ -284,7 +302,7 @@ class ConnectorService:
|
|||
"""
|
||||
# Get Tavily connector configuration
|
||||
tavily_connector = await self.get_connector_by_type(
|
||||
user_id, SearchSourceConnectorType.TAVILY_API, search_space_id
|
||||
SearchSourceConnectorType.TAVILY_API, search_space_id
|
||||
)
|
||||
|
||||
if not tavily_connector:
|
||||
|
|
@ -377,7 +395,6 @@ class ConnectorService:
|
|||
async def search_searxng(
|
||||
self,
|
||||
user_query: str,
|
||||
user_id: str,
|
||||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
) -> tuple:
|
||||
|
|
@ -385,7 +402,7 @@ class ConnectorService:
|
|||
Search using a configured SearxNG instance and return both sources and documents.
|
||||
"""
|
||||
searx_connector = await self.get_connector_by_type(
|
||||
user_id, SearchSourceConnectorType.SEARXNG_API, search_space_id
|
||||
SearchSourceConnectorType.SEARXNG_API, search_space_id
|
||||
)
|
||||
|
||||
if not searx_connector:
|
||||
|
|
@ -563,7 +580,6 @@ class ConnectorService:
|
|||
async def search_baidu(
|
||||
self,
|
||||
user_query: str,
|
||||
user_id: str,
|
||||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
) -> tuple:
|
||||
|
|
@ -575,7 +591,6 @@ class ConnectorService:
|
|||
|
||||
Args:
|
||||
user_query: User's search query
|
||||
user_id: User ID
|
||||
search_space_id: Search space ID
|
||||
top_k: Maximum number of results to return
|
||||
|
||||
|
|
@ -584,7 +599,7 @@ class ConnectorService:
|
|||
"""
|
||||
# Get Baidu connector configuration
|
||||
baidu_connector = await self.get_connector_by_type(
|
||||
user_id, SearchSourceConnectorType.BAIDU_SEARCH_API, search_space_id
|
||||
SearchSourceConnectorType.BAIDU_SEARCH_API, search_space_id
|
||||
)
|
||||
|
||||
if not baidu_connector:
|
||||
|
|
@ -789,7 +804,6 @@ class ConnectorService:
|
|||
async def search_slack(
|
||||
self,
|
||||
user_query: str,
|
||||
user_id: str,
|
||||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
|
|
@ -804,7 +818,6 @@ class ConnectorService:
|
|||
slack_chunks = await self.chunk_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="SLACK_CONNECTOR",
|
||||
)
|
||||
|
|
@ -812,7 +825,6 @@ class ConnectorService:
|
|||
slack_chunks = await self.document_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="SLACK_CONNECTOR",
|
||||
)
|
||||
|
|
@ -877,7 +889,6 @@ class ConnectorService:
|
|||
async def search_notion(
|
||||
self,
|
||||
user_query: str,
|
||||
user_id: str,
|
||||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
|
|
@ -887,7 +898,6 @@ class ConnectorService:
|
|||
|
||||
Args:
|
||||
user_query: The user's query
|
||||
user_id: The user's ID
|
||||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
|
||||
|
|
@ -898,7 +908,6 @@ class ConnectorService:
|
|||
notion_chunks = await self.chunk_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="NOTION_CONNECTOR",
|
||||
)
|
||||
|
|
@ -906,7 +915,6 @@ class ConnectorService:
|
|||
notion_chunks = await self.document_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="NOTION_CONNECTOR",
|
||||
)
|
||||
|
|
@ -974,7 +982,6 @@ class ConnectorService:
|
|||
async def search_extension(
|
||||
self,
|
||||
user_query: str,
|
||||
user_id: str,
|
||||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
|
|
@ -984,7 +991,6 @@ class ConnectorService:
|
|||
|
||||
Args:
|
||||
user_query: The user's query
|
||||
user_id: The user's ID
|
||||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
|
||||
|
|
@ -995,7 +1001,6 @@ class ConnectorService:
|
|||
extension_chunks = await self.chunk_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="EXTENSION",
|
||||
)
|
||||
|
|
@ -1003,7 +1008,6 @@ class ConnectorService:
|
|||
extension_chunks = await self.document_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="EXTENSION",
|
||||
)
|
||||
|
|
@ -1095,7 +1099,6 @@ class ConnectorService:
|
|||
async def search_youtube(
|
||||
self,
|
||||
user_query: str,
|
||||
user_id: str,
|
||||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
|
|
@ -1105,7 +1108,6 @@ class ConnectorService:
|
|||
|
||||
Args:
|
||||
user_query: The user's query
|
||||
user_id: The user's ID
|
||||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
|
||||
|
|
@ -1116,7 +1118,6 @@ class ConnectorService:
|
|||
youtube_chunks = await self.chunk_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="YOUTUBE_VIDEO",
|
||||
)
|
||||
|
|
@ -1124,7 +1125,6 @@ class ConnectorService:
|
|||
youtube_chunks = await self.document_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="YOUTUBE_VIDEO",
|
||||
)
|
||||
|
|
@ -1192,7 +1192,6 @@ class ConnectorService:
|
|||
async def search_github(
|
||||
self,
|
||||
user_query: str,
|
||||
user_id: int,
|
||||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
|
|
@ -1207,7 +1206,6 @@ class ConnectorService:
|
|||
github_chunks = await self.chunk_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="GITHUB_CONNECTOR",
|
||||
)
|
||||
|
|
@ -1215,7 +1213,6 @@ class ConnectorService:
|
|||
github_chunks = await self.document_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="GITHUB_CONNECTOR",
|
||||
)
|
||||
|
|
@ -1267,7 +1264,6 @@ class ConnectorService:
|
|||
async def search_linear(
|
||||
self,
|
||||
user_query: str,
|
||||
user_id: str,
|
||||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
|
|
@ -1277,7 +1273,6 @@ class ConnectorService:
|
|||
|
||||
Args:
|
||||
user_query: The user's query
|
||||
user_id: The user's ID
|
||||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
|
||||
|
|
@ -1288,7 +1283,6 @@ class ConnectorService:
|
|||
linear_chunks = await self.chunk_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="LINEAR_CONNECTOR",
|
||||
)
|
||||
|
|
@ -1296,7 +1290,6 @@ class ConnectorService:
|
|||
linear_chunks = await self.document_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="LINEAR_CONNECTOR",
|
||||
)
|
||||
|
|
@ -1376,7 +1369,6 @@ class ConnectorService:
|
|||
async def search_jira(
|
||||
self,
|
||||
user_query: str,
|
||||
user_id: str,
|
||||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
|
|
@ -1386,7 +1378,6 @@ class ConnectorService:
|
|||
|
||||
Args:
|
||||
user_query: The user's query
|
||||
user_id: The user's ID
|
||||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||
|
|
@ -1398,7 +1389,6 @@ class ConnectorService:
|
|||
jira_chunks = await self.chunk_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="JIRA_CONNECTOR",
|
||||
)
|
||||
|
|
@ -1406,7 +1396,6 @@ class ConnectorService:
|
|||
jira_chunks = await self.document_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="JIRA_CONNECTOR",
|
||||
)
|
||||
|
|
@ -1497,7 +1486,6 @@ class ConnectorService:
|
|||
async def search_google_calendar(
|
||||
self,
|
||||
user_query: str,
|
||||
user_id: str,
|
||||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
|
|
@ -1507,7 +1495,6 @@ class ConnectorService:
|
|||
|
||||
Args:
|
||||
user_query: The user's query
|
||||
user_id: The user's ID
|
||||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||
|
|
@ -1519,7 +1506,6 @@ class ConnectorService:
|
|||
calendar_chunks = await self.chunk_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="GOOGLE_CALENDAR_CONNECTOR",
|
||||
)
|
||||
|
|
@ -1527,7 +1513,6 @@ class ConnectorService:
|
|||
calendar_chunks = await self.document_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="GOOGLE_CALENDAR_CONNECTOR",
|
||||
)
|
||||
|
|
@ -1630,7 +1615,6 @@ class ConnectorService:
|
|||
async def search_airtable(
|
||||
self,
|
||||
user_query: str,
|
||||
user_id: str,
|
||||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
|
|
@ -1640,7 +1624,6 @@ class ConnectorService:
|
|||
|
||||
Args:
|
||||
user_query: The user's query
|
||||
user_id: The user's ID
|
||||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||
|
|
@ -1652,7 +1635,6 @@ class ConnectorService:
|
|||
airtable_chunks = await self.chunk_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="AIRTABLE_CONNECTOR",
|
||||
)
|
||||
|
|
@ -1660,7 +1642,6 @@ class ConnectorService:
|
|||
airtable_chunks = await self.document_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="AIRTABLE_CONNECTOR",
|
||||
)
|
||||
|
|
@ -1718,7 +1699,6 @@ class ConnectorService:
|
|||
async def search_google_gmail(
|
||||
self,
|
||||
user_query: str,
|
||||
user_id: str,
|
||||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
|
|
@ -1728,7 +1708,6 @@ class ConnectorService:
|
|||
|
||||
Args:
|
||||
user_query: The user's query
|
||||
user_id: The user's ID
|
||||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||
|
|
@ -1740,7 +1719,6 @@ class ConnectorService:
|
|||
gmail_chunks = await self.chunk_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="GOOGLE_GMAIL_CONNECTOR",
|
||||
)
|
||||
|
|
@ -1748,7 +1726,6 @@ class ConnectorService:
|
|||
gmail_chunks = await self.document_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="GOOGLE_GMAIL_CONNECTOR",
|
||||
)
|
||||
|
|
@ -1842,7 +1819,6 @@ class ConnectorService:
|
|||
async def search_confluence(
|
||||
self,
|
||||
user_query: str,
|
||||
user_id: str,
|
||||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
|
|
@ -1852,7 +1828,6 @@ class ConnectorService:
|
|||
|
||||
Args:
|
||||
user_query: The user's query
|
||||
user_id: The user's ID
|
||||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||
|
|
@ -1864,7 +1839,6 @@ class ConnectorService:
|
|||
confluence_chunks = await self.chunk_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="CONFLUENCE_CONNECTOR",
|
||||
)
|
||||
|
|
@ -1872,7 +1846,6 @@ class ConnectorService:
|
|||
confluence_chunks = await self.document_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="CONFLUENCE_CONNECTOR",
|
||||
)
|
||||
|
|
@ -1937,7 +1910,6 @@ class ConnectorService:
|
|||
async def search_clickup(
|
||||
self,
|
||||
user_query: str,
|
||||
user_id: str,
|
||||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
|
|
@ -1947,7 +1919,6 @@ class ConnectorService:
|
|||
|
||||
Args:
|
||||
user_query: The user's query
|
||||
user_id: The user's ID
|
||||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||
|
|
@ -1959,7 +1930,6 @@ class ConnectorService:
|
|||
clickup_chunks = await self.chunk_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="CLICKUP_CONNECTOR",
|
||||
)
|
||||
|
|
@ -1967,7 +1937,6 @@ class ConnectorService:
|
|||
clickup_chunks = await self.document_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="CLICKUP_CONNECTOR",
|
||||
)
|
||||
|
|
@ -2053,7 +2022,6 @@ class ConnectorService:
|
|||
async def search_linkup(
|
||||
self,
|
||||
user_query: str,
|
||||
user_id: str,
|
||||
search_space_id: int,
|
||||
mode: str = "standard",
|
||||
) -> tuple:
|
||||
|
|
@ -2062,7 +2030,6 @@ class ConnectorService:
|
|||
|
||||
Args:
|
||||
user_query: The user's query
|
||||
user_id: The user's ID
|
||||
search_space_id: The search space ID
|
||||
mode: Search depth mode, can be "standard" or "deep"
|
||||
|
||||
|
|
@ -2071,7 +2038,7 @@ class ConnectorService:
|
|||
"""
|
||||
# Get Linkup connector configuration
|
||||
linkup_connector = await self.get_connector_by_type(
|
||||
user_id, SearchSourceConnectorType.LINKUP_API, search_space_id
|
||||
SearchSourceConnectorType.LINKUP_API, search_space_id
|
||||
)
|
||||
|
||||
if not linkup_connector:
|
||||
|
|
@ -2176,7 +2143,6 @@ class ConnectorService:
|
|||
async def search_discord(
|
||||
self,
|
||||
user_query: str,
|
||||
user_id: str,
|
||||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
|
|
@ -2186,7 +2152,6 @@ class ConnectorService:
|
|||
|
||||
Args:
|
||||
user_query: The user's query
|
||||
user_id: The user's ID
|
||||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
|
||||
|
|
@ -2197,7 +2162,6 @@ class ConnectorService:
|
|||
discord_chunks = await self.chunk_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="DISCORD_CONNECTOR",
|
||||
)
|
||||
|
|
@ -2205,7 +2169,6 @@ class ConnectorService:
|
|||
discord_chunks = await self.document_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="DISCORD_CONNECTOR",
|
||||
)
|
||||
|
|
@ -2273,7 +2236,6 @@ class ConnectorService:
|
|||
async def search_luma(
|
||||
self,
|
||||
user_query: str,
|
||||
user_id: str,
|
||||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
|
|
@ -2283,7 +2245,6 @@ class ConnectorService:
|
|||
|
||||
Args:
|
||||
user_query: The user's query
|
||||
user_id: The user's ID
|
||||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||
|
|
@ -2295,7 +2256,6 @@ class ConnectorService:
|
|||
luma_chunks = await self.chunk_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="LUMA_CONNECTOR",
|
||||
)
|
||||
|
|
@ -2303,7 +2263,6 @@ class ConnectorService:
|
|||
luma_chunks = await self.document_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="LUMA_CONNECTOR",
|
||||
)
|
||||
|
|
@ -2431,7 +2390,6 @@ class ConnectorService:
|
|||
async def search_elasticsearch(
|
||||
self,
|
||||
user_query: str,
|
||||
user_id: str,
|
||||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
|
|
@ -2441,7 +2399,6 @@ class ConnectorService:
|
|||
|
||||
Args:
|
||||
user_query: The user's query
|
||||
user_id: The user's ID
|
||||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||
|
|
@ -2453,7 +2410,6 @@ class ConnectorService:
|
|||
elasticsearch_chunks = await self.chunk_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="ELASTICSEARCH_CONNECTOR",
|
||||
)
|
||||
|
|
@ -2461,7 +2417,6 @@ class ConnectorService:
|
|||
elasticsearch_chunks = await self.document_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="ELASTICSEARCH_CONNECTOR",
|
||||
)
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
|||
from sqlalchemy.future import select
|
||||
|
||||
from app.config import config
|
||||
from app.db import LLMConfig, UserSearchSpacePreference
|
||||
from app.db import LLMConfig, SearchSpace
|
||||
|
||||
# Configure litellm to automatically drop unsupported parameters
|
||||
litellm.drop_params = True
|
||||
|
|
@ -144,15 +144,16 @@ async def validate_llm_config(
|
|||
return False, error_msg
|
||||
|
||||
|
||||
async def get_user_llm_instance(
|
||||
session: AsyncSession, user_id: str, search_space_id: int, role: str
|
||||
async def get_search_space_llm_instance(
|
||||
session: AsyncSession, search_space_id: int, role: str
|
||||
) -> ChatLiteLLM | None:
|
||||
"""
|
||||
Get a ChatLiteLLM instance for a specific user, search space, and role.
|
||||
Get a ChatLiteLLM instance for a specific search space and role.
|
||||
|
||||
LLM preferences are stored at the search space level and shared by all members.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
user_id: User ID
|
||||
search_space_id: Search Space ID
|
||||
role: LLM role ('long_context', 'fast', or 'strategic')
|
||||
|
||||
|
|
@ -160,37 +161,30 @@ async def get_user_llm_instance(
|
|||
ChatLiteLLM instance or None if not found
|
||||
"""
|
||||
try:
|
||||
# Get user's LLM preferences for this search space
|
||||
# Get the search space with its LLM preferences
|
||||
result = await session.execute(
|
||||
select(UserSearchSpacePreference).where(
|
||||
UserSearchSpacePreference.user_id == user_id,
|
||||
UserSearchSpacePreference.search_space_id == search_space_id,
|
||||
)
|
||||
select(SearchSpace).where(SearchSpace.id == search_space_id)
|
||||
)
|
||||
preference = result.scalars().first()
|
||||
search_space = result.scalars().first()
|
||||
|
||||
if not preference:
|
||||
logger.error(
|
||||
f"No LLM preferences found for user {user_id} in search space {search_space_id}"
|
||||
)
|
||||
if not search_space:
|
||||
logger.error(f"Search space {search_space_id} not found")
|
||||
return None
|
||||
|
||||
# Get the appropriate LLM config ID based on role
|
||||
llm_config_id = None
|
||||
if role == LLMRole.LONG_CONTEXT:
|
||||
llm_config_id = preference.long_context_llm_id
|
||||
llm_config_id = search_space.long_context_llm_id
|
||||
elif role == LLMRole.FAST:
|
||||
llm_config_id = preference.fast_llm_id
|
||||
llm_config_id = search_space.fast_llm_id
|
||||
elif role == LLMRole.STRATEGIC:
|
||||
llm_config_id = preference.strategic_llm_id
|
||||
llm_config_id = search_space.strategic_llm_id
|
||||
else:
|
||||
logger.error(f"Invalid LLM role: {role}")
|
||||
return None
|
||||
|
||||
if not llm_config_id:
|
||||
logger.error(
|
||||
f"No {role} LLM configured for user {user_id} in search space {search_space_id}"
|
||||
)
|
||||
logger.error(f"No {role} LLM configured for search space {search_space_id}")
|
||||
return None
|
||||
|
||||
# Check if this is a global config (negative ID)
|
||||
|
|
@ -331,31 +325,63 @@ async def get_user_llm_instance(
|
|||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error getting LLM instance for user {user_id}, role {role}: {e!s}"
|
||||
f"Error getting LLM instance for search space {search_space_id}, role {role}: {e!s}"
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
async def get_long_context_llm(
|
||||
session: AsyncSession, search_space_id: int
|
||||
) -> ChatLiteLLM | None:
|
||||
"""Get the search space's long context LLM instance."""
|
||||
return await get_search_space_llm_instance(
|
||||
session, search_space_id, LLMRole.LONG_CONTEXT
|
||||
)
|
||||
|
||||
|
||||
async def get_fast_llm(
|
||||
session: AsyncSession, search_space_id: int
|
||||
) -> ChatLiteLLM | None:
|
||||
"""Get the search space's fast LLM instance."""
|
||||
return await get_search_space_llm_instance(session, search_space_id, LLMRole.FAST)
|
||||
|
||||
|
||||
async def get_strategic_llm(
|
||||
session: AsyncSession, search_space_id: int
|
||||
) -> ChatLiteLLM | None:
|
||||
"""Get the search space's strategic LLM instance."""
|
||||
return await get_search_space_llm_instance(
|
||||
session, search_space_id, LLMRole.STRATEGIC
|
||||
)
|
||||
|
||||
|
||||
# Backward-compatible aliases (deprecated - will be removed in future versions)
|
||||
async def get_user_llm_instance(
|
||||
session: AsyncSession, user_id: str, search_space_id: int, role: str
|
||||
) -> ChatLiteLLM | None:
|
||||
"""
|
||||
Deprecated: Use get_search_space_llm_instance instead.
|
||||
LLM preferences are now stored at the search space level, not per-user.
|
||||
"""
|
||||
return await get_search_space_llm_instance(session, search_space_id, role)
|
||||
|
||||
|
||||
async def get_user_long_context_llm(
|
||||
session: AsyncSession, user_id: str, search_space_id: int
|
||||
) -> ChatLiteLLM | None:
|
||||
"""Get user's long context LLM instance for a specific search space."""
|
||||
return await get_user_llm_instance(
|
||||
session, user_id, search_space_id, LLMRole.LONG_CONTEXT
|
||||
)
|
||||
"""Deprecated: Use get_long_context_llm instead."""
|
||||
return await get_long_context_llm(session, search_space_id)
|
||||
|
||||
|
||||
async def get_user_fast_llm(
|
||||
session: AsyncSession, user_id: str, search_space_id: int
|
||||
) -> ChatLiteLLM | None:
|
||||
"""Get user's fast LLM instance for a specific search space."""
|
||||
return await get_user_llm_instance(session, user_id, search_space_id, LLMRole.FAST)
|
||||
"""Deprecated: Use get_fast_llm instead."""
|
||||
return await get_fast_llm(session, search_space_id)
|
||||
|
||||
|
||||
async def get_user_strategic_llm(
|
||||
session: AsyncSession, user_id: str, search_space_id: int
|
||||
) -> ChatLiteLLM | None:
|
||||
"""Get user's strategic LLM instance for a specific search space."""
|
||||
return await get_user_llm_instance(
|
||||
session, user_id, search_space_id, LLMRole.STRATEGIC
|
||||
)
|
||||
"""Deprecated: Use get_strategic_llm instead."""
|
||||
return await get_strategic_llm(session, search_space_id)
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ from typing import Any
|
|||
from langchain.schema import AIMessage, HumanMessage, SystemMessage
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.services.llm_service import get_user_strategic_llm
|
||||
from app.services.llm_service import get_strategic_llm
|
||||
|
||||
|
||||
class QueryService:
|
||||
|
|
@ -16,19 +16,17 @@ class QueryService:
|
|||
async def reformulate_query_with_chat_history(
|
||||
user_query: str,
|
||||
session: AsyncSession,
|
||||
user_id: str,
|
||||
search_space_id: int,
|
||||
chat_history_str: str | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
Reformulate the user query using the user's strategic LLM to make it more
|
||||
Reformulate the user query using the search space's strategic LLM to make it more
|
||||
effective for information retrieval and research purposes.
|
||||
|
||||
Args:
|
||||
user_query: The original user query
|
||||
session: Database session for accessing user LLM configs
|
||||
user_id: User ID to get their specific LLM configuration
|
||||
search_space_id: Search Space ID to get user's LLM preferences
|
||||
session: Database session for accessing LLM configs
|
||||
search_space_id: Search Space ID to get LLM preferences
|
||||
chat_history_str: Optional chat history string
|
||||
|
||||
Returns:
|
||||
|
|
@ -38,11 +36,11 @@ class QueryService:
|
|||
return user_query
|
||||
|
||||
try:
|
||||
# Get the user's strategic LLM instance
|
||||
llm = await get_user_strategic_llm(session, user_id, search_space_id)
|
||||
# Get the search space's strategic LLM instance
|
||||
llm = await get_strategic_llm(session, search_space_id)
|
||||
if not llm:
|
||||
print(
|
||||
f"Warning: No strategic LLM configured for user {user_id} in search space {search_space_id}. Using original query."
|
||||
f"Warning: No strategic LLM configured for search space {search_space_id}. Using original query."
|
||||
)
|
||||
return user_query
|
||||
|
||||
|
|
|
|||
|
|
@ -600,3 +600,46 @@ async def _index_elasticsearch_documents(
|
|||
await run_elasticsearch_indexing(
|
||||
session, connector_id, search_space_id, user_id, start_date, end_date
|
||||
)
|
||||
|
||||
|
||||
@celery_app.task(name="index_crawled_urls", bind=True)
|
||||
def index_crawled_urls_task(
|
||||
self,
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
):
|
||||
"""Celery task to index Web page Urls."""
|
||||
import asyncio
|
||||
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
try:
|
||||
loop.run_until_complete(
|
||||
_index_crawled_urls(
|
||||
connector_id, search_space_id, user_id, start_date, end_date
|
||||
)
|
||||
)
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
|
||||
async def _index_crawled_urls(
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
):
|
||||
"""Index Web page Urls with new session."""
|
||||
from app.routes.search_source_connectors_routes import (
|
||||
run_web_page_indexing,
|
||||
)
|
||||
|
||||
async with get_celery_session_maker()() as session:
|
||||
await run_web_page_indexing(
|
||||
session, connector_id, search_space_id, user_id, start_date, end_date
|
||||
)
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ from app.celery_app import celery_app
|
|||
from app.config import config
|
||||
from app.services.task_logging_service import TaskLoggingService
|
||||
from app.tasks.document_processors import (
|
||||
add_crawled_url_document,
|
||||
add_extension_received_document,
|
||||
add_youtube_video_document,
|
||||
)
|
||||
|
|
@ -120,71 +119,6 @@ async def _process_extension_document(
|
|||
raise
|
||||
|
||||
|
||||
@celery_app.task(name="process_crawled_url", bind=True)
|
||||
def process_crawled_url_task(self, url: str, search_space_id: int, user_id: str):
|
||||
"""
|
||||
Celery task to process crawled URL.
|
||||
|
||||
Args:
|
||||
url: URL to crawl and process
|
||||
search_space_id: ID of the search space
|
||||
user_id: ID of the user
|
||||
"""
|
||||
import asyncio
|
||||
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
try:
|
||||
loop.run_until_complete(_process_crawled_url(url, search_space_id, user_id))
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
|
||||
async def _process_crawled_url(url: str, search_space_id: int, user_id: str):
|
||||
"""Process crawled URL with new session."""
|
||||
async with get_celery_session_maker()() as session:
|
||||
task_logger = TaskLoggingService(session, search_space_id)
|
||||
|
||||
log_entry = await task_logger.log_task_start(
|
||||
task_name="process_crawled_url",
|
||||
source="document_processor",
|
||||
message=f"Starting URL crawling and processing for: {url}",
|
||||
metadata={"document_type": "CRAWLED_URL", "url": url, "user_id": user_id},
|
||||
)
|
||||
|
||||
try:
|
||||
result = await add_crawled_url_document(
|
||||
session, url, search_space_id, user_id
|
||||
)
|
||||
|
||||
if result:
|
||||
await task_logger.log_task_success(
|
||||
log_entry,
|
||||
f"Successfully crawled and processed URL: {url}",
|
||||
{
|
||||
"document_id": result.id,
|
||||
"title": result.title,
|
||||
"content_hash": result.content_hash,
|
||||
},
|
||||
)
|
||||
else:
|
||||
await task_logger.log_task_success(
|
||||
log_entry,
|
||||
f"URL document already exists (duplicate): {url}",
|
||||
{"duplicate_detected": True},
|
||||
)
|
||||
except Exception as e:
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"Failed to crawl URL: {url}",
|
||||
str(e),
|
||||
{"error_type": type(e).__name__},
|
||||
)
|
||||
logger.error(f"Error processing crawled URL: {e!s}")
|
||||
raise
|
||||
|
||||
|
||||
@celery_app.task(name="process_youtube_video", bind=True)
|
||||
def process_youtube_video_task(self, url: str, search_space_id: int, user_id: str):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -67,6 +67,7 @@ async def _check_and_trigger_schedules():
|
|||
index_airtable_records_task,
|
||||
index_clickup_tasks_task,
|
||||
index_confluence_pages_task,
|
||||
index_crawled_urls_task,
|
||||
index_discord_messages_task,
|
||||
index_elasticsearch_documents_task,
|
||||
index_github_repos_task,
|
||||
|
|
@ -94,6 +95,7 @@ async def _check_and_trigger_schedules():
|
|||
SearchSourceConnectorType.DISCORD_CONNECTOR: index_discord_messages_task,
|
||||
SearchSourceConnectorType.LUMA_CONNECTOR: index_luma_events_task,
|
||||
SearchSourceConnectorType.ELASTICSEARCH_CONNECTOR: index_elasticsearch_documents_task,
|
||||
SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: index_crawled_urls_task,
|
||||
}
|
||||
|
||||
# Trigger indexing for each due connector
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ Available indexers:
|
|||
- Google Gmail: Index messages from Google Gmail
|
||||
- Google Calendar: Index events from Google Calendar
|
||||
- Luma: Index events from Luma
|
||||
- Webcrawler: Index crawled URLs
|
||||
- Elasticsearch: Index documents from Elasticsearch instances
|
||||
"""
|
||||
|
||||
|
|
@ -41,6 +42,7 @@ from .luma_indexer import index_luma_events
|
|||
# Documentation and knowledge management
|
||||
from .notion_indexer import index_notion_pages
|
||||
from .slack_indexer import index_slack_messages
|
||||
from .webcrawler_indexer import index_crawled_urls
|
||||
|
||||
__all__ = [ # noqa: RUF022
|
||||
"index_airtable_records",
|
||||
|
|
@ -58,6 +60,7 @@ __all__ = [ # noqa: RUF022
|
|||
"index_linear_issues",
|
||||
# Documentation and knowledge management
|
||||
"index_notion_pages",
|
||||
"index_crawled_urls",
|
||||
# Communication platforms
|
||||
"index_slack_messages",
|
||||
"index_google_gmail_messages",
|
||||
|
|
|
|||
|
|
@ -0,0 +1,450 @@
|
|||
"""
|
||||
Webcrawler connector indexer.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.config import config
|
||||
from app.connectors.webcrawler_connector import WebCrawlerConnector
|
||||
from app.db import Document, DocumentType, SearchSourceConnectorType
|
||||
from app.services.llm_service import get_user_long_context_llm
|
||||
from app.services.task_logging_service import TaskLoggingService
|
||||
from app.utils.document_converters import (
|
||||
create_document_chunks,
|
||||
generate_content_hash,
|
||||
generate_document_summary,
|
||||
generate_unique_identifier_hash,
|
||||
)
|
||||
|
||||
from .base import (
|
||||
check_document_by_unique_identifier,
|
||||
get_connector_by_id,
|
||||
logger,
|
||||
update_connector_last_indexed,
|
||||
)
|
||||
|
||||
|
||||
async def index_crawled_urls(
|
||||
session: AsyncSession,
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str | None = None,
|
||||
end_date: str | None = None,
|
||||
update_last_indexed: bool = True,
|
||||
) -> tuple[int, str | None]:
|
||||
"""
|
||||
Index web page URLs.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
connector_id: ID of the webcrawler connector
|
||||
search_space_id: ID of the search space to store documents in
|
||||
user_id: User ID
|
||||
start_date: Start date for filtering (YYYY-MM-DD format) - optional
|
||||
end_date: End date for filtering (YYYY-MM-DD format) - optional
|
||||
update_last_indexed: Whether to update the last_indexed_at timestamp (default: True)
|
||||
|
||||
Returns:
|
||||
Tuple containing (number of documents indexed, error message or None)
|
||||
"""
|
||||
task_logger = TaskLoggingService(session, search_space_id)
|
||||
|
||||
# Log task start
|
||||
log_entry = await task_logger.log_task_start(
|
||||
task_name="crawled_url_indexing",
|
||||
source="connector_indexing_task",
|
||||
message=f"Starting web page URL indexing for connector {connector_id}",
|
||||
metadata={
|
||||
"connector_id": connector_id,
|
||||
"user_id": str(user_id),
|
||||
"start_date": start_date,
|
||||
"end_date": end_date,
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
# Get the connector
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Retrieving webcrawler connector {connector_id} from database",
|
||||
{"stage": "connector_retrieval"},
|
||||
)
|
||||
|
||||
# Get the connector from the database
|
||||
connector = await get_connector_by_id(
|
||||
session, connector_id, SearchSourceConnectorType.WEBCRAWLER_CONNECTOR
|
||||
)
|
||||
|
||||
if not connector:
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"Connector with ID {connector_id} not found or is not a webcrawler connector",
|
||||
"Connector not found",
|
||||
{"error_type": "ConnectorNotFound"},
|
||||
)
|
||||
return (
|
||||
0,
|
||||
f"Connector with ID {connector_id} not found or is not a webcrawler connector",
|
||||
)
|
||||
|
||||
# Get the Firecrawl API key from the connector config (optional)
|
||||
api_key = connector.config.get("FIRECRAWL_API_KEY")
|
||||
|
||||
# Get URLs from connector config
|
||||
initial_urls = connector.config.get("INITIAL_URLS", "")
|
||||
if isinstance(initial_urls, str):
|
||||
urls = [url.strip() for url in initial_urls.split("\n") if url.strip()]
|
||||
elif isinstance(initial_urls, list):
|
||||
urls = [url.strip() for url in initial_urls if url.strip()]
|
||||
else:
|
||||
urls = []
|
||||
|
||||
logger.info(
|
||||
f"Starting crawled web page indexing for connector {connector_id} with {len(urls)} URLs"
|
||||
)
|
||||
|
||||
# Initialize webcrawler client
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Initializing webcrawler client for connector {connector_id}",
|
||||
{
|
||||
"stage": "client_initialization",
|
||||
"use_firecrawl": bool(api_key),
|
||||
},
|
||||
)
|
||||
|
||||
crawler = WebCrawlerConnector(firecrawl_api_key=api_key)
|
||||
|
||||
# Validate URLs
|
||||
if not urls:
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
"No URLs provided for indexing",
|
||||
"Empty URL list",
|
||||
{"error_type": "ValidationError"},
|
||||
)
|
||||
return 0, "No URLs provided for indexing"
|
||||
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Starting to crawl {len(urls)} URLs",
|
||||
{
|
||||
"stage": "crawling",
|
||||
"total_urls": len(urls),
|
||||
},
|
||||
)
|
||||
|
||||
documents_indexed = 0
|
||||
documents_updated = 0
|
||||
documents_skipped = 0
|
||||
failed_urls = []
|
||||
|
||||
for idx, url in enumerate(urls, 1):
|
||||
try:
|
||||
logger.info(f"Processing URL {idx}/{len(urls)}: {url}")
|
||||
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Crawling URL {idx}/{len(urls)}: {url}",
|
||||
{
|
||||
"stage": "crawling_url",
|
||||
"url_index": idx,
|
||||
"url": url,
|
||||
},
|
||||
)
|
||||
|
||||
# Crawl the URL
|
||||
crawl_result, error = await crawler.crawl_url(url)
|
||||
|
||||
if error or not crawl_result:
|
||||
logger.warning(f"Failed to crawl URL {url}: {error}")
|
||||
failed_urls.append((url, error or "Unknown error"))
|
||||
continue
|
||||
|
||||
# Extract content and metadata
|
||||
content = crawl_result.get("content", "")
|
||||
metadata = crawl_result.get("metadata", {})
|
||||
crawler_type = crawl_result.get("crawler_type", "unknown")
|
||||
|
||||
if not content.strip():
|
||||
logger.warning(f"Skipping URL with no content: {url}")
|
||||
failed_urls.append((url, "No content extracted"))
|
||||
documents_skipped += 1
|
||||
continue
|
||||
|
||||
# Format content as structured document
|
||||
structured_document = crawler.format_to_structured_document(
|
||||
crawl_result
|
||||
)
|
||||
|
||||
# Generate unique identifier hash for this URL
|
||||
unique_identifier_hash = generate_unique_identifier_hash(
|
||||
DocumentType.CRAWLED_URL, url, search_space_id
|
||||
)
|
||||
|
||||
# Generate content hash
|
||||
# TODO: To fix this by not including dynamic content like date, time, etc.
|
||||
content_hash = generate_content_hash(
|
||||
structured_document, search_space_id
|
||||
)
|
||||
|
||||
# Check if document with this unique identifier already exists
|
||||
existing_document = await check_document_by_unique_identifier(
|
||||
session, unique_identifier_hash
|
||||
)
|
||||
|
||||
# Extract useful metadata
|
||||
title = metadata.get("title", url)
|
||||
description = metadata.get("description", "")
|
||||
language = metadata.get("language", "")
|
||||
|
||||
if existing_document:
|
||||
# Document exists - check if content has changed
|
||||
if existing_document.content_hash == content_hash:
|
||||
logger.info(f"Document for URL {url} unchanged. Skipping.")
|
||||
documents_skipped += 1
|
||||
continue
|
||||
else:
|
||||
# Content has changed - update the existing document
|
||||
logger.info(
|
||||
f"Content changed for URL {url}. Updating document."
|
||||
)
|
||||
|
||||
# Generate summary with metadata
|
||||
user_llm = await get_user_long_context_llm(
|
||||
session, user_id, search_space_id
|
||||
)
|
||||
|
||||
if user_llm:
|
||||
document_metadata = {
|
||||
"url": url,
|
||||
"title": title,
|
||||
"description": description,
|
||||
"language": language,
|
||||
"document_type": "Crawled URL",
|
||||
"crawler_type": crawler_type,
|
||||
}
|
||||
(
|
||||
summary_content,
|
||||
summary_embedding,
|
||||
) = await generate_document_summary(
|
||||
structured_document, user_llm, document_metadata
|
||||
)
|
||||
else:
|
||||
# Fallback to simple summary if no LLM configured
|
||||
summary_content = f"Crawled URL: {title}\n\n"
|
||||
summary_content += f"URL: {url}\n"
|
||||
if description:
|
||||
summary_content += f"Description: {description}\n"
|
||||
if language:
|
||||
summary_content += f"Language: {language}\n"
|
||||
summary_content += f"Crawler: {crawler_type}\n\n"
|
||||
|
||||
# Add content preview
|
||||
content_preview = content[:1000]
|
||||
if len(content) > 1000:
|
||||
content_preview += "..."
|
||||
summary_content += f"Content Preview:\n{content_preview}\n"
|
||||
|
||||
summary_embedding = config.embedding_model_instance.embed(
|
||||
summary_content
|
||||
)
|
||||
|
||||
# Process chunks
|
||||
chunks = await create_document_chunks(content)
|
||||
|
||||
# Update existing document
|
||||
existing_document.title = title
|
||||
existing_document.content = summary_content
|
||||
existing_document.content_hash = content_hash
|
||||
existing_document.embedding = summary_embedding
|
||||
existing_document.document_metadata = {
|
||||
**metadata,
|
||||
"crawler_type": crawler_type,
|
||||
"last_crawled_at": datetime.now().strftime(
|
||||
"%Y-%m-%d %H:%M:%S"
|
||||
),
|
||||
}
|
||||
existing_document.chunks = chunks
|
||||
|
||||
documents_updated += 1
|
||||
logger.info(f"Successfully updated URL {url}")
|
||||
continue
|
||||
|
||||
# Document doesn't exist - create new one
|
||||
# Generate summary with metadata
|
||||
user_llm = await get_user_long_context_llm(
|
||||
session, user_id, search_space_id
|
||||
)
|
||||
|
||||
if user_llm:
|
||||
document_metadata = {
|
||||
"url": url,
|
||||
"title": title,
|
||||
"description": description,
|
||||
"language": language,
|
||||
"document_type": "Crawled URL",
|
||||
"crawler_type": crawler_type,
|
||||
}
|
||||
(
|
||||
summary_content,
|
||||
summary_embedding,
|
||||
) = await generate_document_summary(
|
||||
structured_document, user_llm, document_metadata
|
||||
)
|
||||
else:
|
||||
# Fallback to simple summary if no LLM configured
|
||||
summary_content = f"Crawled URL: {title}\n\n"
|
||||
summary_content += f"URL: {url}\n"
|
||||
if description:
|
||||
summary_content += f"Description: {description}\n"
|
||||
if language:
|
||||
summary_content += f"Language: {language}\n"
|
||||
summary_content += f"Crawler: {crawler_type}\n\n"
|
||||
|
||||
# Add content preview
|
||||
content_preview = content[:1000]
|
||||
if len(content) > 1000:
|
||||
content_preview += "..."
|
||||
summary_content += f"Content Preview:\n{content_preview}\n"
|
||||
|
||||
summary_embedding = config.embedding_model_instance.embed(
|
||||
summary_content
|
||||
)
|
||||
|
||||
chunks = await create_document_chunks(content)
|
||||
|
||||
document = Document(
|
||||
search_space_id=search_space_id,
|
||||
title=title,
|
||||
document_type=DocumentType.CRAWLED_URL,
|
||||
document_metadata={
|
||||
**metadata,
|
||||
"crawler_type": crawler_type,
|
||||
"indexed_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
},
|
||||
content=summary_content,
|
||||
content_hash=content_hash,
|
||||
unique_identifier_hash=unique_identifier_hash,
|
||||
embedding=summary_embedding,
|
||||
chunks=chunks,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
documents_indexed += 1
|
||||
logger.info(f"Successfully indexed new URL {url}")
|
||||
|
||||
# Batch commit every 10 documents
|
||||
if (documents_indexed + documents_updated) % 10 == 0:
|
||||
logger.info(
|
||||
f"Committing batch: {documents_indexed + documents_updated} URLs processed so far"
|
||||
)
|
||||
await session.commit()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error processing URL {url}: {e!s}",
|
||||
exc_info=True,
|
||||
)
|
||||
failed_urls.append((url, str(e)))
|
||||
continue
|
||||
|
||||
total_processed = documents_indexed + documents_updated
|
||||
|
||||
if total_processed > 0:
|
||||
await update_connector_last_indexed(session, connector, update_last_indexed)
|
||||
|
||||
# Final commit for any remaining documents not yet committed in batches
|
||||
logger.info(
|
||||
f"Final commit: Total {documents_indexed} new, {documents_updated} updated URLs processed"
|
||||
)
|
||||
await session.commit()
|
||||
|
||||
# Build result message
|
||||
result_message = None
|
||||
if failed_urls:
|
||||
failed_summary = "; ".join(
|
||||
[f"{url}: {error}" for url, error in failed_urls[:5]]
|
||||
)
|
||||
if len(failed_urls) > 5:
|
||||
failed_summary += f" (and {len(failed_urls) - 5} more)"
|
||||
result_message = (
|
||||
f"Completed with {len(failed_urls)} failures: {failed_summary}"
|
||||
)
|
||||
|
||||
await task_logger.log_task_success(
|
||||
log_entry,
|
||||
f"Successfully completed crawled web page indexing for connector {connector_id}",
|
||||
{
|
||||
"urls_processed": total_processed,
|
||||
"documents_indexed": documents_indexed,
|
||||
"documents_updated": documents_updated,
|
||||
"documents_skipped": documents_skipped,
|
||||
"failed_urls_count": len(failed_urls),
|
||||
},
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Web page indexing completed: {documents_indexed} new, "
|
||||
f"{documents_updated} updated, {documents_skipped} skipped, "
|
||||
f"{len(failed_urls)} failed"
|
||||
)
|
||||
return total_processed, result_message
|
||||
|
||||
except SQLAlchemyError as db_error:
|
||||
await session.rollback()
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"Database error during web page indexing for connector {connector_id}",
|
||||
str(db_error),
|
||||
{"error_type": "SQLAlchemyError"},
|
||||
)
|
||||
logger.error(f"Database error: {db_error!s}", exc_info=True)
|
||||
return 0, f"Database error: {db_error!s}"
|
||||
except Exception as e:
|
||||
await session.rollback()
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"Failed to index web page URLs for connector {connector_id}",
|
||||
str(e),
|
||||
{"error_type": type(e).__name__},
|
||||
)
|
||||
logger.error(f"Failed to index web page URLs: {e!s}", exc_info=True)
|
||||
return 0, f"Failed to index web page URLs: {e!s}"
|
||||
|
||||
|
||||
async def get_crawled_url_documents(
|
||||
session: AsyncSession,
|
||||
search_space_id: int,
|
||||
connector_id: int | None = None,
|
||||
) -> list[Document]:
|
||||
"""
|
||||
Get all crawled URL documents for a search space.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
search_space_id: ID of the search space
|
||||
connector_id: Optional connector ID to filter by
|
||||
|
||||
Returns:
|
||||
List of Document objects
|
||||
"""
|
||||
from sqlalchemy import select
|
||||
|
||||
query = select(Document).filter(
|
||||
Document.search_space_id == search_space_id,
|
||||
Document.document_type == DocumentType.CRAWLED_URL,
|
||||
)
|
||||
|
||||
if connector_id:
|
||||
# Filter by connector if needed - you might need to add a connector_id field to Document
|
||||
# or filter by some other means depending on your schema
|
||||
pass
|
||||
|
||||
result = await session.execute(query)
|
||||
documents = result.scalars().all()
|
||||
return list(documents)
|
||||
|
|
@ -6,7 +6,6 @@ and sources. Each processor is responsible for handling a specific type of docum
|
|||
processing task in the background.
|
||||
|
||||
Available processors:
|
||||
- URL crawler: Process web pages from URLs
|
||||
- Extension processor: Handle documents from browser extension
|
||||
- Markdown processor: Process markdown files
|
||||
- File processors: Handle files using different ETL services (Unstructured, LlamaCloud, Docling)
|
||||
|
|
@ -26,14 +25,11 @@ from .file_processors import (
|
|||
|
||||
# Markdown processor
|
||||
from .markdown_processor import add_received_markdown_file_document
|
||||
from .url_crawler import add_crawled_url_document
|
||||
|
||||
# YouTube processor
|
||||
from .youtube_processor import add_youtube_video_document
|
||||
|
||||
__all__ = [
|
||||
# URL processing
|
||||
"add_crawled_url_document",
|
||||
# Extension processing
|
||||
"add_extension_received_document",
|
||||
"add_received_file_document_using_docling",
|
||||
|
|
|
|||
|
|
@ -1,342 +0,0 @@
|
|||
"""
|
||||
URL crawler document processor.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
import validators
|
||||
from firecrawl import AsyncFirecrawlApp
|
||||
from langchain_community.document_loaders import AsyncChromiumLoader
|
||||
from langchain_core.documents import Document as LangchainDocument
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.config import config
|
||||
from app.db import Document, DocumentType
|
||||
from app.services.llm_service import get_user_long_context_llm
|
||||
from app.services.task_logging_service import TaskLoggingService
|
||||
from app.utils.document_converters import (
|
||||
create_document_chunks,
|
||||
generate_content_hash,
|
||||
generate_document_summary,
|
||||
generate_unique_identifier_hash,
|
||||
)
|
||||
|
||||
from .base import (
|
||||
check_document_by_unique_identifier,
|
||||
md,
|
||||
)
|
||||
|
||||
|
||||
async def add_crawled_url_document(
|
||||
session: AsyncSession, url: str, search_space_id: int, user_id: str
|
||||
) -> Document | None:
|
||||
"""
|
||||
Process and store a document from a crawled URL.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
url: URL to crawl
|
||||
search_space_id: ID of the search space
|
||||
user_id: ID of the user
|
||||
|
||||
Returns:
|
||||
Document object if successful, None if failed
|
||||
"""
|
||||
task_logger = TaskLoggingService(session, search_space_id)
|
||||
|
||||
# Log task start
|
||||
log_entry = await task_logger.log_task_start(
|
||||
task_name="crawl_url_document",
|
||||
source="background_task",
|
||||
message=f"Starting URL crawling process for: {url}",
|
||||
metadata={"url": url, "user_id": str(user_id)},
|
||||
)
|
||||
|
||||
try:
|
||||
# URL validation step
|
||||
await task_logger.log_task_progress(
|
||||
log_entry, f"Validating URL: {url}", {"stage": "validation"}
|
||||
)
|
||||
|
||||
if not validators.url(url):
|
||||
raise ValueError(f"Url {url} is not a valid URL address")
|
||||
|
||||
# Set up crawler
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Setting up crawler for URL: {url}",
|
||||
{
|
||||
"stage": "crawler_setup",
|
||||
"firecrawl_available": bool(config.FIRECRAWL_API_KEY),
|
||||
},
|
||||
)
|
||||
|
||||
use_firecrawl = bool(config.FIRECRAWL_API_KEY)
|
||||
|
||||
if use_firecrawl:
|
||||
# Use Firecrawl SDK directly
|
||||
firecrawl_app = AsyncFirecrawlApp(api_key=config.FIRECRAWL_API_KEY)
|
||||
else:
|
||||
crawl_loader = AsyncChromiumLoader(urls=[url], headless=True)
|
||||
|
||||
# Perform crawling
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Crawling URL content: {url}",
|
||||
{
|
||||
"stage": "crawling",
|
||||
"crawler_type": "AsyncFirecrawlApp"
|
||||
if use_firecrawl
|
||||
else "AsyncChromiumLoader",
|
||||
},
|
||||
)
|
||||
|
||||
if use_firecrawl:
|
||||
# Use async Firecrawl SDK with v1 API - properly awaited
|
||||
scrape_result = await firecrawl_app.scrape_url(
|
||||
url=url, formats=["markdown"]
|
||||
)
|
||||
|
||||
# scrape_result is a Pydantic ScrapeResponse object
|
||||
# Access attributes directly
|
||||
if scrape_result and scrape_result.success:
|
||||
# Extract markdown content
|
||||
markdown_content = scrape_result.markdown or ""
|
||||
|
||||
# Extract metadata - this is a DICT
|
||||
metadata = scrape_result.metadata if scrape_result.metadata else {}
|
||||
|
||||
# Convert to LangChain Document format
|
||||
url_crawled = [
|
||||
LangchainDocument(
|
||||
page_content=markdown_content,
|
||||
metadata={
|
||||
"source": url,
|
||||
"title": metadata.get("title", url),
|
||||
"description": metadata.get("description", ""),
|
||||
"language": metadata.get("language", ""),
|
||||
"sourceURL": metadata.get("sourceURL", url),
|
||||
**metadata, # Include all other metadata fields
|
||||
},
|
||||
)
|
||||
]
|
||||
content_in_markdown = url_crawled[0].page_content
|
||||
else:
|
||||
error_msg = (
|
||||
scrape_result.error
|
||||
if scrape_result and hasattr(scrape_result, "error")
|
||||
else "Unknown error"
|
||||
)
|
||||
raise ValueError(f"Firecrawl failed to scrape URL: {error_msg}")
|
||||
else:
|
||||
# Use AsyncChromiumLoader as fallback
|
||||
url_crawled = await crawl_loader.aload()
|
||||
content_in_markdown = md.transform_documents(url_crawled)[0].page_content
|
||||
|
||||
# Format document
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Processing crawled content from: {url}",
|
||||
{"stage": "content_processing", "content_length": len(content_in_markdown)},
|
||||
)
|
||||
|
||||
# Format document metadata in a more maintainable way
|
||||
metadata_sections = [
|
||||
(
|
||||
"METADATA",
|
||||
[
|
||||
f"{key.upper()}: {value}"
|
||||
for key, value in url_crawled[0].metadata.items()
|
||||
],
|
||||
),
|
||||
(
|
||||
"CONTENT",
|
||||
["FORMAT: markdown", "TEXT_START", content_in_markdown, "TEXT_END"],
|
||||
),
|
||||
]
|
||||
|
||||
# Build the document string more efficiently
|
||||
document_parts = []
|
||||
document_parts.append("<DOCUMENT>")
|
||||
|
||||
for section_title, section_content in metadata_sections:
|
||||
document_parts.append(f"<{section_title}>")
|
||||
document_parts.extend(section_content)
|
||||
document_parts.append(f"</{section_title}>")
|
||||
|
||||
document_parts.append("</DOCUMENT>")
|
||||
combined_document_string = "\n".join(document_parts)
|
||||
|
||||
# Generate unique identifier hash for this URL
|
||||
unique_identifier_hash = generate_unique_identifier_hash(
|
||||
DocumentType.CRAWLED_URL, url, search_space_id
|
||||
)
|
||||
|
||||
# Generate content hash
|
||||
content_hash = generate_content_hash(combined_document_string, search_space_id)
|
||||
|
||||
# Check if document with this unique identifier already exists
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Checking for existing URL: {url}",
|
||||
{"stage": "duplicate_check", "url": url},
|
||||
)
|
||||
|
||||
existing_document = await check_document_by_unique_identifier(
|
||||
session, unique_identifier_hash
|
||||
)
|
||||
|
||||
if existing_document:
|
||||
# Document exists - check if content has changed
|
||||
if existing_document.content_hash == content_hash:
|
||||
await task_logger.log_task_success(
|
||||
log_entry,
|
||||
f"URL document unchanged: {url}",
|
||||
{
|
||||
"duplicate_detected": True,
|
||||
"existing_document_id": existing_document.id,
|
||||
},
|
||||
)
|
||||
logging.info(f"Document for URL {url} unchanged. Skipping.")
|
||||
return existing_document
|
||||
else:
|
||||
# Content has changed - update the existing document
|
||||
logging.info(f"Content changed for URL {url}. Updating document.")
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Updating URL document: {url}",
|
||||
{"stage": "document_update", "url": url},
|
||||
)
|
||||
|
||||
# Get LLM for summary generation (needed for both create and update)
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Preparing for summary generation: {url}",
|
||||
{"stage": "llm_setup"},
|
||||
)
|
||||
|
||||
# Get user's long context LLM
|
||||
user_llm = await get_user_long_context_llm(session, user_id, search_space_id)
|
||||
if not user_llm:
|
||||
raise RuntimeError(
|
||||
f"No long context LLM configured for user {user_id} in search space {search_space_id}"
|
||||
)
|
||||
|
||||
# Generate summary
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Generating summary for URL content: {url}",
|
||||
{"stage": "summary_generation"},
|
||||
)
|
||||
|
||||
# Generate summary with metadata
|
||||
document_metadata = {
|
||||
"url": url,
|
||||
"title": url_crawled[0].metadata.get("title", url),
|
||||
"document_type": "Crawled URL Document",
|
||||
"crawler_type": "FirecrawlApp" if use_firecrawl else "AsyncChromiumLoader",
|
||||
}
|
||||
summary_content, summary_embedding = await generate_document_summary(
|
||||
combined_document_string, user_llm, document_metadata
|
||||
)
|
||||
|
||||
# Process chunks
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Processing content chunks for URL: {url}",
|
||||
{"stage": "chunk_processing"},
|
||||
)
|
||||
|
||||
from app.utils.blocknote_converter import convert_markdown_to_blocknote
|
||||
|
||||
# Convert markdown to BlockNote JSON
|
||||
blocknote_json = await convert_markdown_to_blocknote(combined_document_string)
|
||||
if not blocknote_json:
|
||||
logging.warning(
|
||||
f"Failed to convert crawled URL '{url}' to BlockNote JSON, "
|
||||
"document will not be editable"
|
||||
)
|
||||
|
||||
chunks = await create_document_chunks(content_in_markdown)
|
||||
|
||||
# Update or create document
|
||||
if existing_document:
|
||||
# Update existing document
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Updating document in database for URL: {url}",
|
||||
{"stage": "document_update", "chunks_count": len(chunks)},
|
||||
)
|
||||
|
||||
existing_document.title = url_crawled[0].metadata.get(
|
||||
"title", url_crawled[0].metadata.get("source", url)
|
||||
)
|
||||
existing_document.content = summary_content
|
||||
existing_document.content_hash = content_hash
|
||||
existing_document.embedding = summary_embedding
|
||||
existing_document.document_metadata = url_crawled[0].metadata
|
||||
existing_document.chunks = chunks
|
||||
existing_document.blocknote_document = blocknote_json
|
||||
|
||||
document = existing_document
|
||||
else:
|
||||
# Create new document
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
f"Creating document in database for URL: {url}",
|
||||
{"stage": "document_creation", "chunks_count": len(chunks)},
|
||||
)
|
||||
|
||||
document = Document(
|
||||
search_space_id=search_space_id,
|
||||
title=url_crawled[0].metadata.get(
|
||||
"title", url_crawled[0].metadata.get("source", url)
|
||||
),
|
||||
document_type=DocumentType.CRAWLED_URL,
|
||||
document_metadata=url_crawled[0].metadata,
|
||||
content=summary_content,
|
||||
embedding=summary_embedding,
|
||||
chunks=chunks,
|
||||
content_hash=content_hash,
|
||||
unique_identifier_hash=unique_identifier_hash,
|
||||
blocknote_document=blocknote_json,
|
||||
)
|
||||
|
||||
session.add(document)
|
||||
await session.commit()
|
||||
await session.refresh(document)
|
||||
|
||||
# Log success
|
||||
await task_logger.log_task_success(
|
||||
log_entry,
|
||||
f"Successfully crawled and processed URL: {url}",
|
||||
{
|
||||
"document_id": document.id,
|
||||
"title": document.title,
|
||||
"content_hash": content_hash,
|
||||
"chunks_count": len(chunks),
|
||||
"summary_length": len(summary_content),
|
||||
},
|
||||
)
|
||||
|
||||
return document
|
||||
|
||||
except SQLAlchemyError as db_error:
|
||||
await session.rollback()
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"Database error while processing URL: {url}",
|
||||
str(db_error),
|
||||
{"error_type": "SQLAlchemyError"},
|
||||
)
|
||||
raise db_error
|
||||
except Exception as e:
|
||||
await session.rollback()
|
||||
await task_logger.log_task_failure(
|
||||
log_entry,
|
||||
f"Failed to crawl URL: {url}",
|
||||
str(e),
|
||||
{"error_type": type(e).__name__},
|
||||
)
|
||||
raise RuntimeError(f"Failed to crawl URL: {e!s}") from e
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
from fastapi import HTTPException
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.future import select
|
||||
|
||||
from app.db import User
|
||||
|
||||
|
||||
# Helper function to check user ownership
|
||||
async def check_ownership(session: AsyncSession, model, item_id: int, user: User):
|
||||
item = await session.execute(
|
||||
select(model).filter(model.id == item_id, model.user_id == user.id)
|
||||
)
|
||||
item = item.scalars().first()
|
||||
if not item:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="Item not found or you don't have permission to access it",
|
||||
)
|
||||
return item
|
||||
|
|
@ -31,6 +31,7 @@ CONNECTOR_TASK_MAP = {
|
|||
SearchSourceConnectorType.DISCORD_CONNECTOR: "index_discord_messages",
|
||||
SearchSourceConnectorType.LUMA_CONNECTOR: "index_luma_events",
|
||||
SearchSourceConnectorType.ELASTICSEARCH_CONNECTOR: "index_elasticsearch_documents",
|
||||
SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: "index_crawled_urls",
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -69,6 +70,7 @@ def create_periodic_schedule(
|
|||
index_airtable_records_task,
|
||||
index_clickup_tasks_task,
|
||||
index_confluence_pages_task,
|
||||
index_crawled_urls_task,
|
||||
index_discord_messages_task,
|
||||
index_elasticsearch_documents_task,
|
||||
index_github_repos_task,
|
||||
|
|
@ -96,6 +98,7 @@ def create_periodic_schedule(
|
|||
SearchSourceConnectorType.DISCORD_CONNECTOR: index_discord_messages_task,
|
||||
SearchSourceConnectorType.LUMA_CONNECTOR: index_luma_events_task,
|
||||
SearchSourceConnectorType.ELASTICSEARCH_CONNECTOR: index_elasticsearch_documents_task,
|
||||
SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: index_crawled_urls_task,
|
||||
}
|
||||
|
||||
# Trigger the first run immediately
|
||||
|
|
|
|||
274
surfsense_backend/app/utils/rbac.py
Normal file
274
surfsense_backend/app/utils/rbac.py
Normal file
|
|
@ -0,0 +1,274 @@
|
|||
"""
|
||||
RBAC (Role-Based Access Control) utility functions.
|
||||
Provides helpers for checking user permissions in search spaces.
|
||||
"""
|
||||
|
||||
import secrets
|
||||
from uuid import UUID
|
||||
|
||||
from fastapi import HTTPException
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.future import select
|
||||
from sqlalchemy.orm import selectinload
|
||||
|
||||
from app.db import (
|
||||
Permission,
|
||||
SearchSpace,
|
||||
SearchSpaceMembership,
|
||||
SearchSpaceRole,
|
||||
User,
|
||||
has_permission,
|
||||
)
|
||||
|
||||
|
||||
async def get_user_membership(
|
||||
session: AsyncSession,
|
||||
user_id: UUID,
|
||||
search_space_id: int,
|
||||
) -> SearchSpaceMembership | None:
|
||||
"""
|
||||
Get the user's membership in a search space.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
user_id: User UUID
|
||||
search_space_id: Search space ID
|
||||
|
||||
Returns:
|
||||
SearchSpaceMembership if found, None otherwise
|
||||
"""
|
||||
result = await session.execute(
|
||||
select(SearchSpaceMembership)
|
||||
.options(selectinload(SearchSpaceMembership.role))
|
||||
.filter(
|
||||
SearchSpaceMembership.user_id == user_id,
|
||||
SearchSpaceMembership.search_space_id == search_space_id,
|
||||
)
|
||||
)
|
||||
return result.scalars().first()
|
||||
|
||||
|
||||
async def get_user_permissions(
|
||||
session: AsyncSession,
|
||||
user_id: UUID,
|
||||
search_space_id: int,
|
||||
) -> list[str]:
|
||||
"""
|
||||
Get the user's permissions in a search space.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
user_id: User UUID
|
||||
search_space_id: Search space ID
|
||||
|
||||
Returns:
|
||||
List of permission strings
|
||||
"""
|
||||
membership = await get_user_membership(session, user_id, search_space_id)
|
||||
|
||||
if not membership:
|
||||
return []
|
||||
|
||||
# Owners always have full access
|
||||
if membership.is_owner:
|
||||
return [Permission.FULL_ACCESS.value]
|
||||
|
||||
# Get permissions from role
|
||||
if membership.role:
|
||||
return membership.role.permissions or []
|
||||
|
||||
return []
|
||||
|
||||
|
||||
async def check_permission(
|
||||
session: AsyncSession,
|
||||
user: User,
|
||||
search_space_id: int,
|
||||
required_permission: str,
|
||||
error_message: str = "You don't have permission to perform this action",
|
||||
) -> SearchSpaceMembership:
|
||||
"""
|
||||
Check if a user has a specific permission in a search space.
|
||||
Raises HTTPException if permission is denied.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
user: User object
|
||||
search_space_id: Search space ID
|
||||
required_permission: Permission string to check
|
||||
error_message: Custom error message for permission denied
|
||||
|
||||
Returns:
|
||||
SearchSpaceMembership if permission granted
|
||||
|
||||
Raises:
|
||||
HTTPException: If user doesn't have access or permission
|
||||
"""
|
||||
membership = await get_user_membership(session, user.id, search_space_id)
|
||||
|
||||
if not membership:
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail="You don't have access to this search space",
|
||||
)
|
||||
|
||||
# Get user's permissions
|
||||
if membership.is_owner:
|
||||
permissions = [Permission.FULL_ACCESS.value]
|
||||
elif membership.role:
|
||||
permissions = membership.role.permissions or []
|
||||
else:
|
||||
permissions = []
|
||||
|
||||
if not has_permission(permissions, required_permission):
|
||||
raise HTTPException(status_code=403, detail=error_message)
|
||||
|
||||
return membership
|
||||
|
||||
|
||||
async def check_search_space_access(
|
||||
session: AsyncSession,
|
||||
user: User,
|
||||
search_space_id: int,
|
||||
) -> SearchSpaceMembership:
|
||||
"""
|
||||
Check if a user has any access to a search space.
|
||||
This is used for basic access control (user is a member).
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
user: User object
|
||||
search_space_id: Search space ID
|
||||
|
||||
Returns:
|
||||
SearchSpaceMembership if user has access
|
||||
|
||||
Raises:
|
||||
HTTPException: If user doesn't have access
|
||||
"""
|
||||
membership = await get_user_membership(session, user.id, search_space_id)
|
||||
|
||||
if not membership:
|
||||
raise HTTPException(
|
||||
status_code=403,
|
||||
detail="You don't have access to this search space",
|
||||
)
|
||||
|
||||
return membership
|
||||
|
||||
|
||||
async def is_search_space_owner(
|
||||
session: AsyncSession,
|
||||
user_id: UUID,
|
||||
search_space_id: int,
|
||||
) -> bool:
|
||||
"""
|
||||
Check if a user is the owner of a search space.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
user_id: User UUID
|
||||
search_space_id: Search space ID
|
||||
|
||||
Returns:
|
||||
True if user is the owner, False otherwise
|
||||
"""
|
||||
membership = await get_user_membership(session, user_id, search_space_id)
|
||||
return membership is not None and membership.is_owner
|
||||
|
||||
|
||||
async def get_search_space_with_access_check(
|
||||
session: AsyncSession,
|
||||
user: User,
|
||||
search_space_id: int,
|
||||
required_permission: str | None = None,
|
||||
) -> tuple[SearchSpace, SearchSpaceMembership]:
|
||||
"""
|
||||
Get a search space with access and optional permission check.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
user: User object
|
||||
search_space_id: Search space ID
|
||||
required_permission: Optional permission to check
|
||||
|
||||
Returns:
|
||||
Tuple of (SearchSpace, SearchSpaceMembership)
|
||||
|
||||
Raises:
|
||||
HTTPException: If search space not found or user lacks access/permission
|
||||
"""
|
||||
# Get the search space
|
||||
result = await session.execute(
|
||||
select(SearchSpace).filter(SearchSpace.id == search_space_id)
|
||||
)
|
||||
search_space = result.scalars().first()
|
||||
|
||||
if not search_space:
|
||||
raise HTTPException(status_code=404, detail="Search space not found")
|
||||
|
||||
# Check access
|
||||
if required_permission:
|
||||
membership = await check_permission(
|
||||
session, user, search_space_id, required_permission
|
||||
)
|
||||
else:
|
||||
membership = await check_search_space_access(session, user, search_space_id)
|
||||
|
||||
return search_space, membership
|
||||
|
||||
|
||||
def generate_invite_code() -> str:
|
||||
"""
|
||||
Generate a unique invite code for search space invites.
|
||||
|
||||
Returns:
|
||||
A 32-character URL-safe invite code
|
||||
"""
|
||||
return secrets.token_urlsafe(24)
|
||||
|
||||
|
||||
async def get_default_role(
|
||||
session: AsyncSession,
|
||||
search_space_id: int,
|
||||
) -> SearchSpaceRole | None:
|
||||
"""
|
||||
Get the default role for a search space (used when accepting invites without a specific role).
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
search_space_id: Search space ID
|
||||
|
||||
Returns:
|
||||
Default SearchSpaceRole or None
|
||||
"""
|
||||
result = await session.execute(
|
||||
select(SearchSpaceRole).filter(
|
||||
SearchSpaceRole.search_space_id == search_space_id,
|
||||
SearchSpaceRole.is_default == True, # noqa: E712
|
||||
)
|
||||
)
|
||||
return result.scalars().first()
|
||||
|
||||
|
||||
async def get_owner_role(
|
||||
session: AsyncSession,
|
||||
search_space_id: int,
|
||||
) -> SearchSpaceRole | None:
|
||||
"""
|
||||
Get the Owner role for a search space.
|
||||
|
||||
Args:
|
||||
session: Database session
|
||||
search_space_id: Search space ID
|
||||
|
||||
Returns:
|
||||
Owner SearchSpaceRole or None
|
||||
"""
|
||||
result = await session.execute(
|
||||
select(SearchSpaceRole).filter(
|
||||
SearchSpaceRole.search_space_id == search_space_id,
|
||||
SearchSpaceRole.name == "Owner",
|
||||
)
|
||||
)
|
||||
return result.scalars().first()
|
||||
|
|
@ -469,6 +469,22 @@ def validate_connector_config(
|
|||
if not isinstance(value, list) or not value:
|
||||
raise ValueError(f"{field_name} must be a non-empty list of strings")
|
||||
|
||||
def validate_firecrawl_api_key_format() -> None:
|
||||
"""Validate Firecrawl API key format if provided."""
|
||||
api_key = config.get("FIRECRAWL_API_KEY", "")
|
||||
if api_key and api_key.strip() and not api_key.strip().startswith("fc-"):
|
||||
raise ValueError(
|
||||
"Firecrawl API key should start with 'fc-'. Please verify your API key."
|
||||
)
|
||||
|
||||
def validate_initial_urls() -> None:
|
||||
initial_urls = config.get("INITIAL_URLS", "")
|
||||
if initial_urls and initial_urls.strip():
|
||||
urls = [url.strip() for url in initial_urls.split("\n") if url.strip()]
|
||||
for url in urls:
|
||||
if not validators.url(url):
|
||||
raise ValueError(f"Invalid URL format in INITIAL_URLS: {url}")
|
||||
|
||||
# Lookup table for connector validation rules
|
||||
connector_rules = {
|
||||
"SERPER_API": {"required": ["SERPER_API_KEY"], "validators": {}},
|
||||
|
|
@ -550,6 +566,14 @@ def validate_connector_config(
|
|||
# "validators": {}
|
||||
# },
|
||||
"LUMA_CONNECTOR": {"required": ["LUMA_API_KEY"], "validators": {}},
|
||||
"WEBCRAWLER_CONNECTOR": {
|
||||
"required": [], # No required fields - API key is optional
|
||||
"optional": ["FIRECRAWL_API_KEY", "INITIAL_URLS"],
|
||||
"validators": {
|
||||
"FIRECRAWL_API_KEY": lambda: validate_firecrawl_api_key_format(),
|
||||
"INITIAL_URLS": lambda: validate_initial_urls(),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
rules = connector_rules.get(connector_type_str)
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@ dependencies = [
|
|||
"docling>=2.15.0",
|
||||
"fastapi>=0.115.8",
|
||||
"fastapi-users[oauth,sqlalchemy]>=14.0.1",
|
||||
"firecrawl-py>=1.12.0",
|
||||
"github3.py==4.0.1",
|
||||
"google-api-python-client>=2.156.0",
|
||||
"google-auth-oauthlib>=1.2.1",
|
||||
|
|
@ -49,6 +48,7 @@ dependencies = [
|
|||
"flower>=2.0.1",
|
||||
"redis>=5.2.1",
|
||||
"chonkie[all]>=1.4.0",
|
||||
"firecrawl-py>=4.9.0",
|
||||
]
|
||||
|
||||
[dependency-groups]
|
||||
|
|
|
|||
9
surfsense_backend/uv.lock
generated
9
surfsense_backend/uv.lock
generated
|
|
@ -1541,19 +1541,20 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "firecrawl-py"
|
||||
version = "2.8.0"
|
||||
version = "4.9.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "aiohttp" },
|
||||
{ name = "httpx" },
|
||||
{ name = "nest-asyncio" },
|
||||
{ name = "pydantic" },
|
||||
{ name = "python-dotenv" },
|
||||
{ name = "requests" },
|
||||
{ name = "websockets" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/11/83/64127a0faafb027c2870c3919aae13fd6f8f8066d000bea93c880ab9772a/firecrawl_py-2.8.0.tar.gz", hash = "sha256:657795b6ddd63f0bd38b38bf0571187e0a66becda23d97c032801895257403c9", size = 37941 }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a5/2e/e4112ebd229bc03202584f5ad2ece81c26cb2a7bad0cd4773b8705d996e9/firecrawl_py-4.9.0.tar.gz", hash = "sha256:8e5740ed923c89e6066dfd63b0449f049bbd274652dfac3d735c9ae0572c4b0c", size = 153395 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/74/e6/e69bd2156856f2b1849244ca3b1d993676175b16acbf704ad85580ebaa3c/firecrawl_py-2.8.0-py3-none-any.whl", hash = "sha256:f2e148086aa1ca42f603a56009577b4f66a2c23893eaa71f7c9c0082b4fdcf60", size = 173118 },
|
||||
{ url = "https://files.pythonhosted.org/packages/3a/cf/99848233303ca9c9d84cf22de08adc1051e8b6df672aeed14f32272df86b/firecrawl_py-4.9.0-py3-none-any.whl", hash = "sha256:adb027ed8bdda712201dc9727ead1a051dc3d114c2a0051de1f159c420703684", size = 190971 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -5926,7 +5927,7 @@ requires-dist = [
|
|||
{ name = "fastapi", specifier = ">=0.115.8" },
|
||||
{ name = "fastapi-users", extras = ["oauth", "sqlalchemy"], specifier = ">=14.0.1" },
|
||||
{ name = "faster-whisper", specifier = ">=1.1.0" },
|
||||
{ name = "firecrawl-py", specifier = ">=1.12.0" },
|
||||
{ name = "firecrawl-py", specifier = ">=4.9.0" },
|
||||
{ name = "flower", specifier = ">=2.0.1" },
|
||||
{ name = "github3-py", specifier = "==4.0.1" },
|
||||
{ name = "google-api-python-client", specifier = ">=2.156.0" },
|
||||
|
|
|
|||
|
|
@ -4,6 +4,15 @@
|
|||
"version": "0.0.8",
|
||||
"description": "Extension to collect Browsing History for SurfSense.",
|
||||
"author": "https://github.com/MODSetter",
|
||||
"engines": {
|
||||
"node": ">=18.0.0 <23.0.0",
|
||||
"pnpm": ">=8.0.0"
|
||||
},
|
||||
"pnpm": {
|
||||
"overrides": {
|
||||
"sharp": "^0.33.5"
|
||||
}
|
||||
},
|
||||
"scripts": {
|
||||
"dev": "plasmo dev",
|
||||
"build": "plasmo build",
|
||||
|
|
@ -24,13 +33,14 @@
|
|||
"dom-to-semantic-markdown": "^1.2.11",
|
||||
"linkedom": "0.1.34",
|
||||
"lucide-react": "^0.454.0",
|
||||
"plasmo": "0.89.4",
|
||||
"plasmo": "0.90.5",
|
||||
"postcss-loader": "^8.1.1",
|
||||
"radix-ui": "^1.0.1",
|
||||
"react": "18.2.0",
|
||||
"react-dom": "18.2.0",
|
||||
"react-hooks-global-state": "^2.1.0",
|
||||
"react-router-dom": "^6.26.1",
|
||||
"sharp": "^0.33.5",
|
||||
"tailwind-merge": "^2.5.4",
|
||||
"tailwindcss-animate": "^1.0.7"
|
||||
},
|
||||
|
|
|
|||
2031
surfsense_browser_extension/pnpm-lock.yaml
generated
2031
surfsense_browser_extension/pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load diff
|
|
@ -18,6 +18,7 @@ import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/com
|
|||
import { Separator } from "@/components/ui/separator";
|
||||
import { SidebarInset, SidebarProvider, SidebarTrigger } from "@/components/ui/sidebar";
|
||||
import { useLLMPreferences } from "@/hooks/use-llm-configs";
|
||||
import { useUserAccess } from "@/hooks/use-rbac";
|
||||
import { cn } from "@/lib/utils";
|
||||
|
||||
export function DashboardClientLayout({
|
||||
|
|
@ -60,11 +61,15 @@ export function DashboardClientLayout({
|
|||
}, [activeChatId, isChatPannelOpen]);
|
||||
|
||||
const { loading, error, isOnboardingComplete } = useLLMPreferences(searchSpaceIdNum);
|
||||
const { access, loading: accessLoading } = useUserAccess(searchSpaceIdNum);
|
||||
const [hasCheckedOnboarding, setHasCheckedOnboarding] = useState(false);
|
||||
|
||||
// Skip onboarding check if we're already on the onboarding page
|
||||
const isOnboardingPage = pathname?.includes("/onboard");
|
||||
|
||||
// Only owners should see onboarding - invited members use existing config
|
||||
const isOwner = access?.is_owner ?? false;
|
||||
|
||||
// Translate navigation items
|
||||
const tNavMenu = useTranslations("nav_menu");
|
||||
const translatedNavMain = useMemo(() => {
|
||||
|
|
@ -102,11 +107,13 @@ export function DashboardClientLayout({
|
|||
return;
|
||||
}
|
||||
|
||||
// Only check once after preferences have loaded
|
||||
if (!loading && !hasCheckedOnboarding) {
|
||||
// Wait for both preferences and access data to load
|
||||
if (!loading && !accessLoading && !hasCheckedOnboarding) {
|
||||
const onboardingComplete = isOnboardingComplete();
|
||||
|
||||
if (!onboardingComplete) {
|
||||
// Only redirect to onboarding if user is the owner and onboarding is not complete
|
||||
// Invited members (non-owners) should skip onboarding and use existing config
|
||||
if (!onboardingComplete && isOwner) {
|
||||
router.push(`/dashboard/${searchSpaceId}/onboard`);
|
||||
}
|
||||
|
||||
|
|
@ -114,8 +121,10 @@ export function DashboardClientLayout({
|
|||
}
|
||||
}, [
|
||||
loading,
|
||||
accessLoading,
|
||||
isOnboardingComplete,
|
||||
isOnboardingPage,
|
||||
isOwner,
|
||||
router,
|
||||
searchSpaceId,
|
||||
hasCheckedOnboarding,
|
||||
|
|
@ -145,7 +154,7 @@ export function DashboardClientLayout({
|
|||
}, [chat_id, search_space_id]);
|
||||
|
||||
// Show loading screen while checking onboarding status (only on first load)
|
||||
if (!hasCheckedOnboarding && loading && !isOnboardingPage) {
|
||||
if (!hasCheckedOnboarding && (loading || accessLoading) && !isOnboardingPage) {
|
||||
return (
|
||||
<div className="flex flex-col items-center justify-center min-h-screen space-y-4">
|
||||
<Card className="w-[350px] bg-background/60 backdrop-blur-sm">
|
||||
|
|
|
|||
|
|
@ -18,7 +18,16 @@ import {
|
|||
CardHeader,
|
||||
CardTitle,
|
||||
} from "@/components/ui/card";
|
||||
import { Form } from "@/components/ui/form";
|
||||
import {
|
||||
Form,
|
||||
FormControl,
|
||||
FormDescription,
|
||||
FormField,
|
||||
FormItem,
|
||||
FormLabel,
|
||||
FormMessage,
|
||||
} from "@/components/ui/form";
|
||||
import { Textarea } from "@/components/ui/textarea";
|
||||
import { getConnectorIcon } from "@/contracts/enums/connectorIcons";
|
||||
import { useConnectorEditPage } from "@/hooks/use-connector-edit-page";
|
||||
// Import Utils, Types, Hook, and Components
|
||||
|
|
@ -282,6 +291,40 @@ export default function EditConnectorPage() {
|
|||
placeholder="Your Elasticsearch API Key"
|
||||
/>
|
||||
)}
|
||||
|
||||
{/* == Webcrawler == */}
|
||||
{connector.connector_type === "WEBCRAWLER_CONNECTOR" && (
|
||||
<div className="space-y-4">
|
||||
<EditSimpleTokenForm
|
||||
control={editForm.control}
|
||||
fieldName="FIRECRAWL_API_KEY"
|
||||
fieldLabel="Firecrawl API Key (Optional)"
|
||||
fieldDescription="Add a Firecrawl API key for enhanced crawling capabilities. If not provided, will use AsyncChromiumLoader as fallback."
|
||||
placeholder="fc-xxxxxxxxxxxxx"
|
||||
/>
|
||||
<FormField
|
||||
control={editForm.control}
|
||||
name="INITIAL_URLS"
|
||||
render={({ field }) => (
|
||||
<FormItem>
|
||||
<FormLabel>URLs to Crawl</FormLabel>
|
||||
<FormControl>
|
||||
<Textarea
|
||||
placeholder="https://example.com https://docs.example.com https://blog.example.com"
|
||||
className="min-h-[150px] font-mono text-sm"
|
||||
{...field}
|
||||
/>
|
||||
</FormControl>
|
||||
<FormDescription>
|
||||
Enter URLs to crawl (one per line). These URLs will be indexed when you
|
||||
trigger indexing.
|
||||
</FormDescription>
|
||||
<FormMessage />
|
||||
</FormItem>
|
||||
)}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
</CardContent>
|
||||
<CardFooter className="border-t pt-6">
|
||||
<Button type="submit" disabled={isSaving} className="w-full sm:w-auto">
|
||||
|
|
|
|||
|
|
@ -55,6 +55,7 @@ const getConnectorTypeDisplay = (type: string): string => {
|
|||
AIRTABLE_CONNECTOR: "Airtable Connector",
|
||||
LUMA_CONNECTOR: "Luma Connector",
|
||||
ELASTICSEARCH_CONNECTOR: "Elasticsearch Connector",
|
||||
WEBCRAWLER_CONNECTOR: "Web Page Connector",
|
||||
// Add other connector types here as needed
|
||||
};
|
||||
return typeMap[type] || type;
|
||||
|
|
@ -75,6 +76,7 @@ const getApiKeyFieldName = (connectorType: string): string => {
|
|||
LINKUP_API: "LINKUP_API_KEY",
|
||||
LUMA_CONNECTOR: "LUMA_API_KEY",
|
||||
ELASTICSEARCH_CONNECTOR: "ELASTICSEARCH_API_KEY",
|
||||
WEBCRAWLER_CONNECTOR: "FIRECRAWL_API_KEY",
|
||||
};
|
||||
return fieldMap[connectorType] || "";
|
||||
};
|
||||
|
|
|
|||
|
|
@ -0,0 +1,331 @@
|
|||
"use client";
|
||||
|
||||
import { zodResolver } from "@hookform/resolvers/zod";
|
||||
import { ArrowLeft, Check, Globe, Loader2 } from "lucide-react";
|
||||
import { motion } from "motion/react";
|
||||
import Link from "next/link";
|
||||
import { useParams, useRouter } from "next/navigation";
|
||||
import { useEffect, useState } from "react";
|
||||
import { useForm } from "react-hook-form";
|
||||
import { toast } from "sonner";
|
||||
import * as z from "zod";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import {
|
||||
Card,
|
||||
CardContent,
|
||||
CardDescription,
|
||||
CardFooter,
|
||||
CardHeader,
|
||||
CardTitle,
|
||||
} from "@/components/ui/card";
|
||||
import {
|
||||
Form,
|
||||
FormControl,
|
||||
FormDescription,
|
||||
FormField,
|
||||
FormItem,
|
||||
FormLabel,
|
||||
FormMessage,
|
||||
} from "@/components/ui/form";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Textarea } from "@/components/ui/textarea";
|
||||
import { EnumConnectorName } from "@/contracts/enums/connector";
|
||||
import { getConnectorIcon } from "@/contracts/enums/connectorIcons";
|
||||
import {
|
||||
type SearchSourceConnector,
|
||||
useSearchSourceConnectors,
|
||||
} from "@/hooks/use-search-source-connectors";
|
||||
|
||||
// Define the form schema with Zod
|
||||
const webcrawlerConnectorFormSchema = z.object({
|
||||
name: z.string().min(3, {
|
||||
message: "Connector name must be at least 3 characters.",
|
||||
}),
|
||||
api_key: z.string().optional(),
|
||||
initial_urls: z.string().optional(),
|
||||
});
|
||||
|
||||
// Define the type for the form values
|
||||
type WebcrawlerConnectorFormValues = z.infer<typeof webcrawlerConnectorFormSchema>;
|
||||
|
||||
export default function WebcrawlerConnectorPage() {
|
||||
const router = useRouter();
|
||||
const params = useParams();
|
||||
const searchSpaceId = params.search_space_id as string;
|
||||
const [isSubmitting, setIsSubmitting] = useState(false);
|
||||
const [doesConnectorExist, setDoesConnectorExist] = useState(false);
|
||||
|
||||
const { fetchConnectors, createConnector } = useSearchSourceConnectors(
|
||||
true,
|
||||
parseInt(searchSpaceId)
|
||||
);
|
||||
|
||||
// Initialize the form
|
||||
const form = useForm<WebcrawlerConnectorFormValues>({
|
||||
resolver: zodResolver(webcrawlerConnectorFormSchema),
|
||||
defaultValues: {
|
||||
name: "Web Pages",
|
||||
api_key: "",
|
||||
initial_urls: "",
|
||||
},
|
||||
});
|
||||
|
||||
useEffect(() => {
|
||||
fetchConnectors(parseInt(searchSpaceId))
|
||||
.then((data) => {
|
||||
if (data && Array.isArray(data)) {
|
||||
const connector = data.find(
|
||||
(c: SearchSourceConnector) =>
|
||||
c.connector_type === EnumConnectorName.WEBCRAWLER_CONNECTOR
|
||||
);
|
||||
if (connector) {
|
||||
setDoesConnectorExist(true);
|
||||
}
|
||||
}
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error("Error fetching connectors:", error);
|
||||
});
|
||||
}, [fetchConnectors, searchSpaceId]);
|
||||
|
||||
// Handle form submission
|
||||
const onSubmit = async (values: WebcrawlerConnectorFormValues) => {
|
||||
setIsSubmitting(true);
|
||||
try {
|
||||
const config: Record<string, string> = {};
|
||||
|
||||
// Only add API key to config if provided
|
||||
if (values.api_key && values.api_key.trim()) {
|
||||
config.FIRECRAWL_API_KEY = values.api_key;
|
||||
}
|
||||
|
||||
// Parse initial URLs if provided
|
||||
if (values.initial_urls && values.initial_urls.trim()) {
|
||||
config.INITIAL_URLS = values.initial_urls;
|
||||
}
|
||||
|
||||
await createConnector(
|
||||
{
|
||||
name: values.name,
|
||||
connector_type: EnumConnectorName.WEBCRAWLER_CONNECTOR,
|
||||
config: config,
|
||||
is_indexable: true,
|
||||
last_indexed_at: null,
|
||||
periodic_indexing_enabled: false,
|
||||
indexing_frequency_minutes: null,
|
||||
next_scheduled_at: null,
|
||||
},
|
||||
parseInt(searchSpaceId)
|
||||
);
|
||||
|
||||
toast.success("Webcrawler connector created successfully!");
|
||||
|
||||
// Navigate back to connectors page
|
||||
router.push(`/dashboard/${searchSpaceId}/connectors`);
|
||||
} catch (error) {
|
||||
console.error("Error creating connector:", error);
|
||||
toast.error(error instanceof Error ? error.message : "Failed to create connector");
|
||||
} finally {
|
||||
setIsSubmitting(false);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="container mx-auto py-8 max-w-2xl">
|
||||
<motion.div
|
||||
initial={{ opacity: 0, y: 20 }}
|
||||
animate={{ opacity: 1, y: 0 }}
|
||||
transition={{ duration: 0.5 }}
|
||||
>
|
||||
{/* Header */}
|
||||
<div className="mb-8">
|
||||
<Link
|
||||
href={`/dashboard/${searchSpaceId}/connectors/add`}
|
||||
className="inline-flex items-center text-sm text-muted-foreground hover:text-foreground mb-4"
|
||||
>
|
||||
<ArrowLeft className="mr-2 h-4 w-4" />
|
||||
Back to connectors
|
||||
</Link>
|
||||
<div className="flex items-center gap-4">
|
||||
<div className="flex h-12 w-12 items-center justify-center rounded-lg">
|
||||
{getConnectorIcon(EnumConnectorName.WEBCRAWLER_CONNECTOR, "h-6 w-6")}
|
||||
</div>
|
||||
<div>
|
||||
<h1 className="text-3xl font-bold tracking-tight">Connect Web Pages</h1>
|
||||
<p className="text-muted-foreground">Crawl and index web pages for search.</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Connection Card */}
|
||||
{!doesConnectorExist ? (
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<CardTitle>Set Up Web Page crawler</CardTitle>
|
||||
<CardDescription>
|
||||
Configure your web page crawler to index web pages. Optionally add a Firecrawl API
|
||||
key for enhanced crawling capabilities.
|
||||
</CardDescription>
|
||||
</CardHeader>
|
||||
<Form {...form}>
|
||||
<form onSubmit={form.handleSubmit(onSubmit)}>
|
||||
<CardContent className="space-y-4">
|
||||
<FormField
|
||||
control={form.control}
|
||||
name="name"
|
||||
render={({ field }) => (
|
||||
<FormItem>
|
||||
<FormLabel>Connector Name</FormLabel>
|
||||
<FormControl>
|
||||
<Input placeholder="My Web Crawler" {...field} />
|
||||
</FormControl>
|
||||
<FormDescription>
|
||||
A friendly name to identify this connector.
|
||||
</FormDescription>
|
||||
<FormMessage />
|
||||
</FormItem>
|
||||
)}
|
||||
/>
|
||||
|
||||
<FormField
|
||||
control={form.control}
|
||||
name="api_key"
|
||||
render={({ field }) => (
|
||||
<FormItem>
|
||||
<FormLabel>Firecrawl API Key (Optional)</FormLabel>
|
||||
<FormControl>
|
||||
<Input type="password" placeholder="fc-xxxxxxxxxxxxx" {...field} />
|
||||
</FormControl>
|
||||
<FormDescription>
|
||||
Add a Firecrawl API key for enhanced crawling. If not provided, will use
|
||||
AsyncChromiumLoader as fallback.
|
||||
</FormDescription>
|
||||
<FormMessage />
|
||||
</FormItem>
|
||||
)}
|
||||
/>
|
||||
|
||||
<FormField
|
||||
control={form.control}
|
||||
name="initial_urls"
|
||||
render={({ field }) => (
|
||||
<FormItem>
|
||||
<FormLabel>Initial URLs (Optional)</FormLabel>
|
||||
<FormControl>
|
||||
<Textarea
|
||||
placeholder="https://example.com https://docs.example.com https://blog.example.com"
|
||||
className="min-h-[100px] font-mono text-sm"
|
||||
{...field}
|
||||
/>
|
||||
</FormControl>
|
||||
<FormDescription>
|
||||
Enter URLs to crawl (one per line). You can add more URLs later.
|
||||
</FormDescription>
|
||||
<FormMessage />
|
||||
</FormItem>
|
||||
)}
|
||||
/>
|
||||
|
||||
<div className="space-y-2 pt-2">
|
||||
<div className="flex items-center space-x-2 text-sm text-muted-foreground">
|
||||
<Check className="h-4 w-4 text-green-500" />
|
||||
<span>Crawl any public web page</span>
|
||||
</div>
|
||||
<div className="flex items-center space-x-2 text-sm text-muted-foreground">
|
||||
<Check className="h-4 w-4 text-green-500" />
|
||||
<span>Extract markdown content automatically</span>
|
||||
</div>
|
||||
<div className="flex items-center space-x-2 text-sm text-muted-foreground">
|
||||
<Check className="h-4 w-4 text-green-500" />
|
||||
<span>Detect content changes and update documents</span>
|
||||
</div>
|
||||
<div className="flex items-center space-x-2 text-sm text-muted-foreground">
|
||||
<Check className="h-4 w-4 text-green-500" />
|
||||
<span>Works with or without Firecrawl API key</span>
|
||||
</div>
|
||||
</div>
|
||||
</CardContent>
|
||||
<CardFooter className="flex justify-between">
|
||||
<Button
|
||||
type="button"
|
||||
variant="outline"
|
||||
onClick={() => router.push(`/dashboard/${searchSpaceId}/connectors/add`)}
|
||||
>
|
||||
Cancel
|
||||
</Button>
|
||||
<Button type="submit" disabled={isSubmitting}>
|
||||
{isSubmitting ? (
|
||||
<>
|
||||
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
|
||||
Setting up...
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<Globe className="mr-2 h-4 w-4" />
|
||||
Create Crawler
|
||||
</>
|
||||
)}
|
||||
</Button>
|
||||
</CardFooter>
|
||||
</form>
|
||||
</Form>
|
||||
</Card>
|
||||
) : (
|
||||
/* Success Card */
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<CardTitle>✅ Your web page crawler is successfully set up!</CardTitle>
|
||||
<CardDescription>
|
||||
You can now add URLs to crawl from the connector management page.
|
||||
</CardDescription>
|
||||
</CardHeader>
|
||||
</Card>
|
||||
)}
|
||||
|
||||
{/* Help Section */}
|
||||
{!doesConnectorExist && (
|
||||
<Card className="mt-6">
|
||||
<CardHeader>
|
||||
<CardTitle className="text-lg">How It Works</CardTitle>
|
||||
</CardHeader>
|
||||
<CardContent className="space-y-4">
|
||||
<div>
|
||||
<h4 className="font-medium mb-2">1. Choose Your Crawler Method</h4>
|
||||
<p className="text-sm text-muted-foreground">
|
||||
<strong>With Firecrawl (Recommended):</strong> Get your API key from{" "}
|
||||
<a
|
||||
href="https://firecrawl.dev"
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="text-primary hover:underline"
|
||||
>
|
||||
firecrawl.dev
|
||||
</a>{" "}
|
||||
for faster, more reliable crawling with better content extraction.
|
||||
</p>
|
||||
<p className="text-sm text-muted-foreground mt-2">
|
||||
<strong>Without Firecrawl:</strong> The crawler will use AsyncChromiumLoader as a
|
||||
free fallback option. This works well for most websites but may be slower.
|
||||
</p>
|
||||
</div>
|
||||
<div>
|
||||
<h4 className="font-medium mb-2">2. Add URLs to Crawl (Optional)</h4>
|
||||
<p className="text-sm text-muted-foreground">
|
||||
You can add initial URLs now or add them later from the connector management page.
|
||||
Enter one URL per line.
|
||||
</p>
|
||||
</div>
|
||||
<div>
|
||||
<h4 className="font-medium mb-2">3. Manage Your Crawler</h4>
|
||||
<p className="text-sm text-muted-foreground">
|
||||
After setup, you can add more URLs, trigger manual crawls, or set up periodic
|
||||
indexing to keep your content up-to-date.
|
||||
</p>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
)}
|
||||
</motion.div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
|
@ -1,201 +0,0 @@
|
|||
"use client";
|
||||
|
||||
import { type Tag, TagInput } from "emblor";
|
||||
import { Globe, Loader2 } from "lucide-react";
|
||||
import { useParams, useRouter } from "next/navigation";
|
||||
import { useTranslations } from "next-intl";
|
||||
import { useState } from "react";
|
||||
import { toast } from "sonner";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import {
|
||||
Card,
|
||||
CardContent,
|
||||
CardDescription,
|
||||
CardFooter,
|
||||
CardHeader,
|
||||
CardTitle,
|
||||
} from "@/components/ui/card";
|
||||
import { Label } from "@/components/ui/label";
|
||||
|
||||
// URL validation regex
|
||||
const urlRegex = /^(https?:\/\/)?([\da-z.-]+)\.([a-z.]{2,6})([/\w .-]*)*\/?$/;
|
||||
|
||||
export default function WebpageCrawler() {
|
||||
const t = useTranslations("add_webpage");
|
||||
const params = useParams();
|
||||
const router = useRouter();
|
||||
const search_space_id = params.search_space_id as string;
|
||||
|
||||
const [urlTags, setUrlTags] = useState<Tag[]>([]);
|
||||
const [activeTagIndex, setActiveTagIndex] = useState<number | null>(null);
|
||||
const [isSubmitting, setIsSubmitting] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
// Function to validate a URL
|
||||
const isValidUrl = (url: string): boolean => {
|
||||
return urlRegex.test(url);
|
||||
};
|
||||
|
||||
// Function to handle URL submission
|
||||
const handleSubmit = async () => {
|
||||
// Validate that we have at least one URL
|
||||
if (urlTags.length === 0) {
|
||||
setError(t("error_no_url"));
|
||||
return;
|
||||
}
|
||||
|
||||
// Validate all URLs
|
||||
const invalidUrls = urlTags.filter((tag) => !isValidUrl(tag.text));
|
||||
if (invalidUrls.length > 0) {
|
||||
setError(t("error_invalid_urls", { urls: invalidUrls.map((tag) => tag.text).join(", ") }));
|
||||
return;
|
||||
}
|
||||
|
||||
setError(null);
|
||||
setIsSubmitting(true);
|
||||
|
||||
try {
|
||||
toast(t("crawling_toast"), {
|
||||
description: t("crawling_toast_desc"),
|
||||
});
|
||||
|
||||
// Extract URLs from tags
|
||||
const urls = urlTags.map((tag) => tag.text);
|
||||
|
||||
// Make API call to backend
|
||||
const response = await fetch(
|
||||
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/documents`,
|
||||
{
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
document_type: "CRAWLED_URL",
|
||||
content: urls,
|
||||
search_space_id: parseInt(search_space_id),
|
||||
}),
|
||||
}
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error("Failed to crawl URLs");
|
||||
}
|
||||
|
||||
await response.json();
|
||||
|
||||
toast(t("success_toast"), {
|
||||
description: t("success_toast_desc"),
|
||||
});
|
||||
|
||||
// Redirect to documents page
|
||||
router.push(`/dashboard/${search_space_id}/documents`);
|
||||
} catch (error: any) {
|
||||
setError(error.message || t("error_generic"));
|
||||
toast(t("error_toast"), {
|
||||
description: `${t("error_toast_desc")}: ${error.message}`,
|
||||
});
|
||||
} finally {
|
||||
setIsSubmitting(false);
|
||||
}
|
||||
};
|
||||
|
||||
// Function to add a new URL tag
|
||||
const handleAddTag = (text: string) => {
|
||||
// Basic URL validation
|
||||
if (!isValidUrl(text)) {
|
||||
toast(t("invalid_url_toast"), {
|
||||
description: t("invalid_url_toast_desc"),
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for duplicates
|
||||
if (urlTags.some((tag) => tag.text === text)) {
|
||||
toast(t("duplicate_url_toast"), {
|
||||
description: t("duplicate_url_toast_desc"),
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// Add the new tag
|
||||
const newTag: Tag = {
|
||||
id: Date.now().toString(),
|
||||
text: text,
|
||||
};
|
||||
|
||||
setUrlTags([...urlTags, newTag]);
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="container mx-auto py-8">
|
||||
<Card className="max-w-2xl mx-auto">
|
||||
<CardHeader>
|
||||
<CardTitle className="flex items-center gap-2">
|
||||
<Globe className="h-5 w-5" />
|
||||
{t("title")}
|
||||
</CardTitle>
|
||||
<CardDescription>{t("subtitle")}</CardDescription>
|
||||
</CardHeader>
|
||||
<CardContent>
|
||||
<div className="space-y-4">
|
||||
<div className="space-y-2">
|
||||
<Label htmlFor="url-input">{t("label")}</Label>
|
||||
<TagInput
|
||||
id="url-input"
|
||||
tags={urlTags}
|
||||
setTags={setUrlTags}
|
||||
placeholder={t("placeholder")}
|
||||
onAddTag={handleAddTag}
|
||||
styleClasses={{
|
||||
inlineTagsContainer:
|
||||
"border-input rounded-lg bg-background shadow-sm shadow-black/5 transition-shadow focus-within:border-ring focus-within:outline-none focus-within:ring-[3px] focus-within:ring-ring/20 p-1 gap-1",
|
||||
input: "w-full min-w-[80px] focus-visible:outline-none shadow-none px-2 h-7",
|
||||
tag: {
|
||||
body: "h-7 relative bg-background border border-input hover:bg-background rounded-md font-medium text-xs ps-2 pe-7 flex",
|
||||
closeButton:
|
||||
"absolute -inset-y-px -end-px p-0 rounded-e-lg flex size-7 transition-colors outline-0 focus-visible:outline focus-visible:outline-2 focus-visible:outline-ring/70 text-muted-foreground/80 hover:text-foreground",
|
||||
},
|
||||
}}
|
||||
activeTagIndex={activeTagIndex}
|
||||
setActiveTagIndex={setActiveTagIndex}
|
||||
/>
|
||||
<p className="text-xs text-muted-foreground mt-1">{t("hint")}</p>
|
||||
</div>
|
||||
|
||||
{error && <div className="text-sm text-red-500 mt-2">{error}</div>}
|
||||
|
||||
<div className="bg-muted/50 rounded-lg p-4 text-sm">
|
||||
<h4 className="font-medium mb-2">{t("tips_title")}</h4>
|
||||
<ul className="list-disc pl-5 space-y-1 text-muted-foreground">
|
||||
<li>{t("tip_1")}</li>
|
||||
<li>{t("tip_2")}</li>
|
||||
<li>{t("tip_3")}</li>
|
||||
<li>{t("tip_4")}</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</CardContent>
|
||||
<CardFooter className="flex justify-between">
|
||||
<Button
|
||||
variant="outline"
|
||||
onClick={() => router.push(`/dashboard/${search_space_id}/documents`)}
|
||||
>
|
||||
{t("cancel")}
|
||||
</Button>
|
||||
<Button onClick={handleSubmit} disabled={isSubmitting || urlTags.length === 0}>
|
||||
{isSubmitting ? (
|
||||
<>
|
||||
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
|
||||
{t("submitting")}
|
||||
</>
|
||||
) : (
|
||||
t("submit")
|
||||
)}
|
||||
</Button>
|
||||
</CardFooter>
|
||||
</Card>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
|
@ -52,6 +52,12 @@ export default function DashboardLayout({
|
|||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
title: "Team",
|
||||
url: `/dashboard/${search_space_id}/team`,
|
||||
icon: "Users",
|
||||
items: [],
|
||||
},
|
||||
{
|
||||
title: "Settings",
|
||||
url: `/dashboard/${search_space_id}/settings`,
|
||||
|
|
|
|||
|
|
@ -1126,7 +1126,7 @@ function LogRowActions({ row, t }: { row: Row<Log>; t: (key: string) => string }
|
|||
setIsDeleting(true);
|
||||
try {
|
||||
await deleteLog(log.id);
|
||||
toast.success(t("log_deleted_success"));
|
||||
// toast.success(t("log_deleted_success"));
|
||||
await refreshLogs();
|
||||
} catch (error) {
|
||||
console.error("Error deleting log:", error);
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
"use client";
|
||||
|
||||
import { IconBrandYoutube } from "@tabler/icons-react";
|
||||
import { Cable, Database, Upload } from "lucide-react";
|
||||
import { Cable, Database, Globe, Upload } from "lucide-react";
|
||||
import { motion } from "motion/react";
|
||||
import { useParams, useSearchParams } from "next/navigation";
|
||||
import { useParams, useRouter, useSearchParams } from "next/navigation";
|
||||
import { useEffect, useState } from "react";
|
||||
import { ConnectorsTab } from "@/components/sources/ConnectorsTab";
|
||||
import { DocumentUploadTab } from "@/components/sources/DocumentUploadTab";
|
||||
|
|
@ -12,6 +12,7 @@ import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs";
|
|||
|
||||
export default function AddSourcesPage() {
|
||||
const params = useParams();
|
||||
const router = useRouter();
|
||||
const searchParams = useSearchParams();
|
||||
const search_space_id = params.search_space_id as string;
|
||||
const [activeTab, setActiveTab] = useState("documents");
|
||||
|
|
@ -24,6 +25,14 @@ export default function AddSourcesPage() {
|
|||
}
|
||||
}, [searchParams]);
|
||||
|
||||
const handleTabChange = (value: string) => {
|
||||
if (value === "webpages") {
|
||||
router.push(`/dashboard/${search_space_id}/connectors/add/webcrawler-connector`);
|
||||
} else {
|
||||
setActiveTab(value);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="container mx-auto py-8 px-4">
|
||||
<motion.div
|
||||
|
|
@ -42,19 +51,26 @@ export default function AddSourcesPage() {
|
|||
</div>
|
||||
|
||||
{/* Tabs */}
|
||||
<Tabs value={activeTab} onValueChange={setActiveTab} className="w-full">
|
||||
<TabsList className="grid w-full max-w-2xl mx-auto grid-cols-3 h-12">
|
||||
<Tabs value={activeTab} onValueChange={handleTabChange} className="w-full">
|
||||
<TabsList className="grid w-full max-w-3xl mx-auto grid-cols-4 h-12">
|
||||
<TabsTrigger value="documents" className="flex items-center gap-2">
|
||||
<Upload className="h-4 w-4" />
|
||||
Documents
|
||||
<span className="hidden sm:inline">Documents</span>
|
||||
<span className="sm:hidden">Docs</span>
|
||||
</TabsTrigger>
|
||||
<TabsTrigger value="youtube" className="flex items-center gap-2">
|
||||
<IconBrandYoutube className="h-4 w-4" />
|
||||
YouTube
|
||||
</TabsTrigger>
|
||||
<TabsTrigger value="webpages" className="flex items-center gap-2">
|
||||
<Globe className="h-4 w-4" />
|
||||
<span className="hidden sm:inline">Web Pages</span>
|
||||
<span className="sm:hidden">Web</span>
|
||||
</TabsTrigger>
|
||||
<TabsTrigger value="connectors" className="flex items-center gap-2">
|
||||
<Cable className="h-4 w-4" />
|
||||
Connectors
|
||||
<span className="hidden sm:inline">Connectors</span>
|
||||
<span className="sm:hidden">More</span>
|
||||
</TabsTrigger>
|
||||
</TabsList>
|
||||
|
||||
|
|
|
|||
1325
surfsense_web/app/dashboard/[search_space_id]/team/page.tsx
Normal file
1325
surfsense_web/app/dashboard/[search_space_id]/team/page.tsx
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -1,6 +1,6 @@
|
|||
"use client";
|
||||
|
||||
import { AlertCircle, Loader2, Plus, Search, Trash2 } from "lucide-react";
|
||||
import { AlertCircle, Loader2, Plus, Search, Trash2, UserCheck, Users } from "lucide-react";
|
||||
import { motion, type Variants } from "motion/react";
|
||||
import Image from "next/image";
|
||||
import Link from "next/link";
|
||||
|
|
@ -22,6 +22,7 @@ import {
|
|||
AlertDialogTitle,
|
||||
AlertDialogTrigger,
|
||||
} from "@/components/ui/alert-dialog";
|
||||
import { Badge } from "@/components/ui/badge";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import {
|
||||
Card,
|
||||
|
|
@ -308,16 +309,30 @@ const DashboardPage = () => {
|
|||
>
|
||||
<div className="flex flex-1 flex-col justify-between p-1">
|
||||
<div>
|
||||
<h3 className="font-medium text-lg">{space.name}</h3>
|
||||
<div className="flex items-center gap-2">
|
||||
<h3 className="font-medium text-lg">{space.name}</h3>
|
||||
{!space.is_owner && (
|
||||
<Badge variant="secondary" className="text-xs font-normal">
|
||||
{t("shared")}
|
||||
</Badge>
|
||||
)}
|
||||
</div>
|
||||
<p className="mt-1 text-sm text-muted-foreground">
|
||||
{space.description}
|
||||
</p>
|
||||
</div>
|
||||
<div className="mt-4 text-xs text-muted-foreground">
|
||||
{/* <span>{space.title}</span> */}
|
||||
<div className="mt-4 flex items-center justify-between text-xs text-muted-foreground">
|
||||
<span>
|
||||
{t("created")} {formatDate(space.created_at)}
|
||||
</span>
|
||||
<div className="flex items-center gap-1">
|
||||
{space.is_owner ? (
|
||||
<UserCheck className="h-3.5 w-3.5" />
|
||||
) : (
|
||||
<Users className="h-3.5 w-3.5" />
|
||||
)}
|
||||
<span>{space.member_count}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</Link>
|
||||
|
|
|
|||
336
surfsense_web/app/invite/[invite_code]/page.tsx
Normal file
336
surfsense_web/app/invite/[invite_code]/page.tsx
Normal file
|
|
@ -0,0 +1,336 @@
|
|||
"use client";
|
||||
|
||||
import {
|
||||
AlertCircle,
|
||||
ArrowRight,
|
||||
CheckCircle2,
|
||||
Clock,
|
||||
Loader2,
|
||||
LogIn,
|
||||
Shield,
|
||||
Sparkles,
|
||||
Users,
|
||||
XCircle,
|
||||
} from "lucide-react";
|
||||
import { motion } from "motion/react";
|
||||
import Image from "next/image";
|
||||
import Link from "next/link";
|
||||
import { useParams, useRouter } from "next/navigation";
|
||||
import { use, useEffect, useState } from "react";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import {
|
||||
Card,
|
||||
CardContent,
|
||||
CardDescription,
|
||||
CardFooter,
|
||||
CardHeader,
|
||||
CardTitle,
|
||||
} from "@/components/ui/card";
|
||||
import { useInviteInfo } from "@/hooks/use-rbac";
|
||||
|
||||
export default function InviteAcceptPage() {
|
||||
const params = useParams();
|
||||
const router = useRouter();
|
||||
const inviteCode = params.invite_code as string;
|
||||
|
||||
const { inviteInfo, loading, acceptInvite } = useInviteInfo(inviteCode);
|
||||
const [accepting, setAccepting] = useState(false);
|
||||
const [accepted, setAccepted] = useState(false);
|
||||
const [acceptedData, setAcceptedData] = useState<{
|
||||
search_space_id: number;
|
||||
search_space_name: string;
|
||||
role_name: string;
|
||||
} | null>(null);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [isLoggedIn, setIsLoggedIn] = useState<boolean | null>(null);
|
||||
|
||||
// Check if user is logged in
|
||||
useEffect(() => {
|
||||
if (typeof window !== "undefined") {
|
||||
const token = localStorage.getItem("surfsense_bearer_token");
|
||||
setIsLoggedIn(!!token);
|
||||
}
|
||||
}, []);
|
||||
|
||||
const handleAccept = async () => {
|
||||
setAccepting(true);
|
||||
setError(null);
|
||||
try {
|
||||
const result = await acceptInvite();
|
||||
if (result) {
|
||||
setAccepted(true);
|
||||
setAcceptedData(result);
|
||||
}
|
||||
} catch (err: any) {
|
||||
setError(err.message || "Failed to accept invite");
|
||||
} finally {
|
||||
setAccepting(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleLoginRedirect = () => {
|
||||
// Store the invite code to redirect back after login
|
||||
localStorage.setItem("pending_invite_code", inviteCode);
|
||||
router.push("/auth");
|
||||
};
|
||||
|
||||
// Check for pending invite after login
|
||||
useEffect(() => {
|
||||
if (isLoggedIn && typeof window !== "undefined") {
|
||||
const pendingInvite = localStorage.getItem("pending_invite_code");
|
||||
if (pendingInvite === inviteCode) {
|
||||
localStorage.removeItem("pending_invite_code");
|
||||
// Auto-accept the invite after redirect
|
||||
handleAccept();
|
||||
}
|
||||
}
|
||||
}, [isLoggedIn, inviteCode]);
|
||||
|
||||
return (
|
||||
<div className="min-h-screen flex items-center justify-center p-4 bg-gradient-to-br from-background via-background to-primary/5">
|
||||
{/* Background decoration */}
|
||||
<div className="absolute inset-0 overflow-hidden pointer-events-none">
|
||||
<div className="absolute -top-1/2 -right-1/2 w-full h-full bg-gradient-to-bl from-primary/10 via-transparent to-transparent rounded-full blur-3xl" />
|
||||
<div className="absolute -bottom-1/2 -left-1/2 w-full h-full bg-gradient-to-tr from-violet-500/10 via-transparent to-transparent rounded-full blur-3xl" />
|
||||
</div>
|
||||
|
||||
<motion.div
|
||||
initial={{ opacity: 0, y: 20, scale: 0.95 }}
|
||||
animate={{ opacity: 1, y: 0, scale: 1 }}
|
||||
transition={{ duration: 0.5, ease: "easeOut" }}
|
||||
className="w-full max-w-md relative z-10"
|
||||
>
|
||||
<Card className="border-none shadow-2xl bg-card/80 backdrop-blur-xl">
|
||||
{loading || isLoggedIn === null ? (
|
||||
<CardContent className="flex flex-col items-center justify-center py-16">
|
||||
<motion.div
|
||||
animate={{ rotate: 360 }}
|
||||
transition={{ duration: 1, repeat: Infinity, ease: "linear" }}
|
||||
>
|
||||
<Loader2 className="h-12 w-12 text-primary" />
|
||||
</motion.div>
|
||||
<p className="mt-4 text-muted-foreground">Loading invite details...</p>
|
||||
</CardContent>
|
||||
) : accepted && acceptedData ? (
|
||||
<>
|
||||
<CardHeader className="text-center pb-4">
|
||||
<motion.div
|
||||
initial={{ scale: 0 }}
|
||||
animate={{ scale: 1 }}
|
||||
transition={{ type: "spring", stiffness: 200, damping: 15 }}
|
||||
className="mx-auto mb-4 h-20 w-20 rounded-full bg-gradient-to-br from-emerald-500/20 to-emerald-500/5 flex items-center justify-center ring-4 ring-emerald-500/20"
|
||||
>
|
||||
<CheckCircle2 className="h-10 w-10 text-emerald-500" />
|
||||
</motion.div>
|
||||
<CardTitle className="text-2xl">Welcome to the team!</CardTitle>
|
||||
<CardDescription>
|
||||
You've successfully joined {acceptedData.search_space_name}
|
||||
</CardDescription>
|
||||
</CardHeader>
|
||||
<CardContent className="space-y-4">
|
||||
<div className="bg-muted/50 rounded-lg p-4 space-y-3">
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="h-10 w-10 rounded-lg bg-primary/10 flex items-center justify-center">
|
||||
<Users className="h-5 w-5 text-primary" />
|
||||
</div>
|
||||
<div>
|
||||
<p className="font-medium">{acceptedData.search_space_name}</p>
|
||||
<p className="text-sm text-muted-foreground">Search Space</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="h-10 w-10 rounded-lg bg-violet-500/10 flex items-center justify-center">
|
||||
<Shield className="h-5 w-5 text-violet-500" />
|
||||
</div>
|
||||
<div>
|
||||
<p className="font-medium">{acceptedData.role_name}</p>
|
||||
<p className="text-sm text-muted-foreground">Your Role</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</CardContent>
|
||||
<CardFooter>
|
||||
<Button
|
||||
className="w-full gap-2"
|
||||
onClick={() => router.push(`/dashboard/${acceptedData.search_space_id}`)}
|
||||
>
|
||||
Go to Search Space
|
||||
<ArrowRight className="h-4 w-4" />
|
||||
</Button>
|
||||
</CardFooter>
|
||||
</>
|
||||
) : !inviteInfo?.is_valid ? (
|
||||
<>
|
||||
<CardHeader className="text-center pb-4">
|
||||
<motion.div
|
||||
initial={{ scale: 0 }}
|
||||
animate={{ scale: 1 }}
|
||||
transition={{ type: "spring", stiffness: 200, damping: 15 }}
|
||||
className="mx-auto mb-4 h-20 w-20 rounded-full bg-gradient-to-br from-destructive/20 to-destructive/5 flex items-center justify-center ring-4 ring-destructive/20"
|
||||
>
|
||||
<XCircle className="h-10 w-10 text-destructive" />
|
||||
</motion.div>
|
||||
<CardTitle className="text-2xl">Invalid Invite</CardTitle>
|
||||
<CardDescription>
|
||||
{inviteInfo?.message || "This invite link is no longer valid"}
|
||||
</CardDescription>
|
||||
</CardHeader>
|
||||
<CardContent className="text-center">
|
||||
<p className="text-sm text-muted-foreground">
|
||||
The invite may have expired, reached its maximum uses, or been revoked by the
|
||||
owner.
|
||||
</p>
|
||||
</CardContent>
|
||||
<CardFooter>
|
||||
<Button
|
||||
variant="outline"
|
||||
className="w-full"
|
||||
onClick={() => router.push("/dashboard")}
|
||||
>
|
||||
Go to Dashboard
|
||||
</Button>
|
||||
</CardFooter>
|
||||
</>
|
||||
) : !isLoggedIn ? (
|
||||
<>
|
||||
<CardHeader className="text-center pb-4">
|
||||
<motion.div
|
||||
initial={{ scale: 0 }}
|
||||
animate={{ scale: 1 }}
|
||||
transition={{ type: "spring", stiffness: 200, damping: 15 }}
|
||||
className="mx-auto mb-4 h-20 w-20 rounded-full bg-gradient-to-br from-primary/20 to-primary/5 flex items-center justify-center ring-4 ring-primary/20"
|
||||
>
|
||||
<Sparkles className="h-10 w-10 text-primary" />
|
||||
</motion.div>
|
||||
<CardTitle className="text-2xl">You're Invited!</CardTitle>
|
||||
<CardDescription>
|
||||
Sign in to join {inviteInfo?.search_space_name || "this search space"}
|
||||
</CardDescription>
|
||||
</CardHeader>
|
||||
<CardContent className="space-y-4">
|
||||
<div className="bg-muted/50 rounded-lg p-4 space-y-3">
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="h-10 w-10 rounded-lg bg-primary/10 flex items-center justify-center">
|
||||
<Users className="h-5 w-5 text-primary" />
|
||||
</div>
|
||||
<div>
|
||||
<p className="font-medium">{inviteInfo?.search_space_name}</p>
|
||||
<p className="text-sm text-muted-foreground">Search Space</p>
|
||||
</div>
|
||||
</div>
|
||||
{inviteInfo?.role_name && (
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="h-10 w-10 rounded-lg bg-violet-500/10 flex items-center justify-center">
|
||||
<Shield className="h-5 w-5 text-violet-500" />
|
||||
</div>
|
||||
<div>
|
||||
<p className="font-medium">{inviteInfo.role_name}</p>
|
||||
<p className="text-sm text-muted-foreground">Role you'll receive</p>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</CardContent>
|
||||
<CardFooter>
|
||||
<Button className="w-full gap-2" onClick={handleLoginRedirect}>
|
||||
<LogIn className="h-4 w-4" />
|
||||
Sign in to Accept
|
||||
</Button>
|
||||
</CardFooter>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<CardHeader className="text-center pb-4">
|
||||
<motion.div
|
||||
initial={{ scale: 0 }}
|
||||
animate={{ scale: 1 }}
|
||||
transition={{ type: "spring", stiffness: 200, damping: 15 }}
|
||||
className="mx-auto mb-4 h-20 w-20 rounded-full bg-gradient-to-br from-primary/20 to-primary/5 flex items-center justify-center ring-4 ring-primary/20"
|
||||
>
|
||||
<Sparkles className="h-10 w-10 text-primary" />
|
||||
</motion.div>
|
||||
<CardTitle className="text-2xl">You're Invited!</CardTitle>
|
||||
<CardDescription>
|
||||
Accept this invite to join {inviteInfo?.search_space_name || "this search space"}
|
||||
</CardDescription>
|
||||
</CardHeader>
|
||||
<CardContent className="space-y-4">
|
||||
<div className="bg-muted/50 rounded-lg p-4 space-y-3">
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="h-10 w-10 rounded-lg bg-primary/10 flex items-center justify-center">
|
||||
<Users className="h-5 w-5 text-primary" />
|
||||
</div>
|
||||
<div>
|
||||
<p className="font-medium">{inviteInfo?.search_space_name}</p>
|
||||
<p className="text-sm text-muted-foreground">Search Space</p>
|
||||
</div>
|
||||
</div>
|
||||
{inviteInfo?.role_name && (
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="h-10 w-10 rounded-lg bg-violet-500/10 flex items-center justify-center">
|
||||
<Shield className="h-5 w-5 text-violet-500" />
|
||||
</div>
|
||||
<div>
|
||||
<p className="font-medium">{inviteInfo.role_name}</p>
|
||||
<p className="text-sm text-muted-foreground">Role you'll receive</p>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{error && (
|
||||
<motion.div
|
||||
initial={{ opacity: 0, y: -10 }}
|
||||
animate={{ opacity: 1, y: 0 }}
|
||||
className="flex items-center gap-2 p-3 bg-destructive/10 text-destructive rounded-lg text-sm"
|
||||
>
|
||||
<AlertCircle className="h-4 w-4 shrink-0" />
|
||||
{error}
|
||||
</motion.div>
|
||||
)}
|
||||
</CardContent>
|
||||
<CardFooter className="flex gap-2">
|
||||
<Button
|
||||
variant="outline"
|
||||
className="flex-1"
|
||||
onClick={() => router.push("/dashboard")}
|
||||
>
|
||||
Cancel
|
||||
</Button>
|
||||
<Button className="flex-1 gap-2" onClick={handleAccept} disabled={accepting}>
|
||||
{accepting ? (
|
||||
<>
|
||||
<Loader2 className="h-4 w-4 animate-spin" />
|
||||
Accepting...
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<CheckCircle2 className="h-4 w-4" />
|
||||
Accept Invite
|
||||
</>
|
||||
)}
|
||||
</Button>
|
||||
</CardFooter>
|
||||
</>
|
||||
)}
|
||||
</Card>
|
||||
|
||||
{/* Branding */}
|
||||
<motion.div
|
||||
initial={{ opacity: 0 }}
|
||||
animate={{ opacity: 1 }}
|
||||
transition={{ delay: 0.3 }}
|
||||
className="mt-6 text-center"
|
||||
>
|
||||
<Link
|
||||
href="/"
|
||||
className="inline-flex items-center gap-2 text-muted-foreground hover:text-foreground transition-colors"
|
||||
>
|
||||
<Image src="/icon-128.png" alt="SurfSense" width={24} height={24} className="rounded" />
|
||||
<span className="text-sm font-medium">SurfSense</span>
|
||||
</Link>
|
||||
</motion.div>
|
||||
</motion.div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
"use client";
|
||||
|
||||
import { ChatInput } from "@llamaindex/chat-ui";
|
||||
import { Brain, Check, FolderOpen, Minus, Plus, Zap } from "lucide-react";
|
||||
import { useParams } from "next/navigation";
|
||||
import { Brain, Check, FolderOpen, Minus, Plus, PlusCircle, Zap } from "lucide-react";
|
||||
import { useParams, useRouter } from "next/navigation";
|
||||
import React, { Suspense, useCallback, useState } from "react";
|
||||
import { DocumentsDataTable } from "@/components/chat/DocumentsDataTable";
|
||||
import { Badge } from "@/components/ui/badge";
|
||||
|
|
@ -115,6 +115,7 @@ const ConnectorSelector = React.memo(
|
|||
selectedConnectors?: string[];
|
||||
}) => {
|
||||
const { search_space_id } = useParams();
|
||||
const router = useRouter();
|
||||
const [isOpen, setIsOpen] = useState(false);
|
||||
|
||||
// Fetch immediately (not lazy) so the button can show the correct count
|
||||
|
|
@ -247,9 +248,19 @@ const ConnectorSelector = React.memo(
|
|||
<Brain className="h-8 w-8 text-muted-foreground" />
|
||||
</div>
|
||||
<h4 className="text-sm font-medium mb-1">No sources found</h4>
|
||||
<p className="text-xs text-muted-foreground max-w-xs">
|
||||
<p className="text-xs text-muted-foreground max-w-xs mb-4">
|
||||
Add documents or configure search connectors for this search space
|
||||
</p>
|
||||
<Button
|
||||
onClick={() => {
|
||||
setIsOpen(false);
|
||||
router.push(`/dashboard/${search_space_id}/sources/add`);
|
||||
}}
|
||||
className="gap-2"
|
||||
>
|
||||
<PlusCircle className="h-4 w-4" />
|
||||
Add Sources
|
||||
</Button>
|
||||
</div>
|
||||
) : (
|
||||
<>
|
||||
|
|
|
|||
|
|
@ -188,6 +188,7 @@ export function DashboardBreadcrumb() {
|
|||
"linkup-api": "LinkUp API",
|
||||
"luma-connector": "Luma",
|
||||
"elasticsearch-connector": "Elasticsearch",
|
||||
"webcrawler-connector": "Web Pages",
|
||||
};
|
||||
|
||||
const connectorLabel = connectorLabels[connectorType] || connectorType;
|
||||
|
|
|
|||
|
|
@ -52,5 +52,7 @@ export const editConnectorSchema = z.object({
|
|||
GOOGLE_CALENDAR_CALENDAR_IDS: z.string().optional(),
|
||||
LUMA_API_KEY: z.string().optional(),
|
||||
ELASTICSEARCH_API_KEY: z.string().optional(),
|
||||
FIRECRAWL_API_KEY: z.string().optional(),
|
||||
INITIAL_URLS: z.string().optional(),
|
||||
});
|
||||
export type EditConnectorFormValues = z.infer<typeof editConnectorSchema>;
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ const INTEGRATIONS: Integration[] = [
|
|||
// Documentation & Knowledge
|
||||
{ name: "Confluence", icon: "https://cdn.simpleicons.org/confluence/172B4D" },
|
||||
{ name: "Notion", icon: "https://cdn.simpleicons.org/notion/000000/ffffff" },
|
||||
{ name: "Web Pages", icon: "https://cdn.jsdelivr.net/npm/lucide-static@0.294.0/icons/globe.svg" },
|
||||
|
||||
// Cloud Storage
|
||||
{ name: "Google Drive", icon: "https://cdn.simpleicons.org/googledrive/4285F4" },
|
||||
|
|
|
|||
|
|
@ -8,6 +8,8 @@ import {
|
|||
FileText,
|
||||
MessageSquare,
|
||||
Sparkles,
|
||||
UserPlus,
|
||||
Users,
|
||||
Zap,
|
||||
} from "lucide-react";
|
||||
import { motion } from "motion/react";
|
||||
|
|
@ -50,16 +52,60 @@ export function CompletionStep({ searchSpaceId }: CompletionStepProps) {
|
|||
<p className="text-muted-foreground">Choose an option to continue</p>
|
||||
</div>
|
||||
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 gap-6">
|
||||
<div className="grid grid-cols-1 md:grid-cols-3 gap-6">
|
||||
{/* Manage Team Card */}
|
||||
<motion.div
|
||||
initial={{ opacity: 0, y: 20 }}
|
||||
animate={{ opacity: 1, y: 0 }}
|
||||
transition={{ delay: 0.9, type: "spring", stiffness: 300, damping: 25 }}
|
||||
>
|
||||
<Card className="h-full border-2 hover:border-emerald-500/50 transition-all duration-300 hover:shadow-xl hover:shadow-emerald-500/10 cursor-pointer group relative overflow-hidden">
|
||||
<div className="absolute top-0 right-0 w-32 h-32 bg-gradient-to-br from-emerald-500/10 to-transparent rounded-full blur-2xl -mr-16 -mt-16 group-hover:scale-150 transition-transform duration-500" />
|
||||
<CardHeader className="relative">
|
||||
<div className="w-12 h-12 bg-gradient-to-br from-emerald-500/20 to-emerald-600/10 rounded-xl flex items-center justify-center mb-3 group-hover:scale-110 group-hover:rotate-3 transition-all duration-300 ring-1 ring-emerald-500/20">
|
||||
<Users className="w-6 h-6 text-emerald-600 dark:text-emerald-400" />
|
||||
</div>
|
||||
<CardTitle className="text-lg">Manage Team</CardTitle>
|
||||
<CardDescription>
|
||||
Invite team members and collaborate on your search space
|
||||
</CardDescription>
|
||||
</CardHeader>
|
||||
<CardContent className="space-y-4 relative">
|
||||
<div className="space-y-2 text-sm text-muted-foreground">
|
||||
<div className="flex items-center gap-2">
|
||||
<UserPlus className="w-4 h-4 text-emerald-500" />
|
||||
<span>Invite team members</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<CheckCircle className="w-4 h-4 text-emerald-500" />
|
||||
<span>Assign roles & permissions</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<CheckCircle className="w-4 h-4 text-emerald-500" />
|
||||
<span>Collaborate together</span>
|
||||
</div>
|
||||
</div>
|
||||
<Button
|
||||
className="w-full bg-emerald-600 hover:bg-emerald-700 text-white group-hover:shadow-lg group-hover:shadow-emerald-500/25 transition-all duration-300"
|
||||
onClick={() => router.push(`/dashboard/${searchSpaceId}/team`)}
|
||||
>
|
||||
Manage Team
|
||||
<ArrowRight className="w-4 h-4 ml-2 group-hover:translate-x-1 transition-transform" />
|
||||
</Button>
|
||||
</CardContent>
|
||||
</Card>
|
||||
</motion.div>
|
||||
|
||||
{/* Add Sources Card */}
|
||||
<motion.div
|
||||
initial={{ opacity: 0, x: -20 }}
|
||||
animate={{ opacity: 1, x: 0 }}
|
||||
transition={{ delay: 0.7 }}
|
||||
initial={{ opacity: 0, y: 20 }}
|
||||
animate={{ opacity: 1, y: 0 }}
|
||||
transition={{ delay: 0.7, type: "spring", stiffness: 300, damping: 25 }}
|
||||
>
|
||||
<Card className="h-full border-2 hover:border-primary/50 transition-all hover:shadow-lg cursor-pointer group">
|
||||
<CardHeader>
|
||||
<div className="w-12 h-12 bg-blue-100 dark:bg-blue-950 rounded-lg flex items-center justify-center mb-3 group-hover:scale-110 transition-transform">
|
||||
<Card className="h-full border-2 hover:border-blue-500/50 transition-all duration-300 hover:shadow-xl hover:shadow-blue-500/10 cursor-pointer group relative overflow-hidden">
|
||||
<div className="absolute top-0 right-0 w-32 h-32 bg-gradient-to-br from-blue-500/10 to-transparent rounded-full blur-2xl -mr-16 -mt-16 group-hover:scale-150 transition-transform duration-500" />
|
||||
<CardHeader className="relative">
|
||||
<div className="w-12 h-12 bg-gradient-to-br from-blue-500/20 to-blue-600/10 rounded-xl flex items-center justify-center mb-3 group-hover:scale-110 group-hover:rotate-3 transition-all duration-300 ring-1 ring-blue-500/20">
|
||||
<FileText className="w-6 h-6 text-blue-600 dark:text-blue-400" />
|
||||
</div>
|
||||
<CardTitle className="text-lg">Add Sources</CardTitle>
|
||||
|
|
@ -67,27 +113,27 @@ export function CompletionStep({ searchSpaceId }: CompletionStepProps) {
|
|||
Connect your data sources to start building your knowledge base
|
||||
</CardDescription>
|
||||
</CardHeader>
|
||||
<CardContent className="space-y-4">
|
||||
<CardContent className="space-y-4 relative">
|
||||
<div className="space-y-2 text-sm text-muted-foreground">
|
||||
<div className="flex items-center gap-2">
|
||||
<CheckCircle className="w-4 h-4 text-green-600" />
|
||||
<CheckCircle className="w-4 h-4 text-emerald-500" />
|
||||
<span>Connect documents and files</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<CheckCircle className="w-4 h-4 text-green-600" />
|
||||
<CheckCircle className="w-4 h-4 text-emerald-500" />
|
||||
<span>Import from various sources</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<CheckCircle className="w-4 h-4 text-green-600" />
|
||||
<CheckCircle className="w-4 h-4 text-emerald-500" />
|
||||
<span>Build your knowledge base</span>
|
||||
</div>
|
||||
</div>
|
||||
<Button
|
||||
className="w-full group-hover:bg-primary/90"
|
||||
className="w-full bg-blue-600 hover:bg-blue-700 text-white group-hover:shadow-lg group-hover:shadow-blue-500/25 transition-all duration-300"
|
||||
onClick={() => router.push(`/dashboard/${searchSpaceId}/sources/add`)}
|
||||
>
|
||||
Add Sources
|
||||
<ArrowRight className="w-4 h-4 ml-2" />
|
||||
<ArrowRight className="w-4 h-4 ml-2 group-hover:translate-x-1 transition-transform" />
|
||||
</Button>
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
|
@ -95,13 +141,14 @@ export function CompletionStep({ searchSpaceId }: CompletionStepProps) {
|
|||
|
||||
{/* Start Chatting Card */}
|
||||
<motion.div
|
||||
initial={{ opacity: 0, x: 20 }}
|
||||
animate={{ opacity: 1, x: 0 }}
|
||||
transition={{ delay: 0.8 }}
|
||||
initial={{ opacity: 0, y: 20 }}
|
||||
animate={{ opacity: 1, y: 0 }}
|
||||
transition={{ delay: 0.8, type: "spring", stiffness: 300, damping: 25 }}
|
||||
>
|
||||
<Card className="h-full border-2 hover:border-primary/50 transition-all hover:shadow-lg cursor-pointer group">
|
||||
<CardHeader>
|
||||
<div className="w-12 h-12 bg-purple-100 dark:bg-purple-950 rounded-lg flex items-center justify-center mb-3 group-hover:scale-110 transition-transform">
|
||||
<Card className="h-full border-2 hover:border-purple-500/50 transition-all duration-300 hover:shadow-xl hover:shadow-purple-500/10 cursor-pointer group relative overflow-hidden">
|
||||
<div className="absolute top-0 right-0 w-32 h-32 bg-gradient-to-br from-purple-500/10 to-transparent rounded-full blur-2xl -mr-16 -mt-16 group-hover:scale-150 transition-transform duration-500" />
|
||||
<CardHeader className="relative">
|
||||
<div className="w-12 h-12 bg-gradient-to-br from-purple-500/20 to-purple-600/10 rounded-xl flex items-center justify-center mb-3 group-hover:scale-110 group-hover:rotate-3 transition-all duration-300 ring-1 ring-purple-500/20">
|
||||
<MessageSquare className="w-6 h-6 text-purple-600 dark:text-purple-400" />
|
||||
</div>
|
||||
<CardTitle className="text-lg">Start Chatting</CardTitle>
|
||||
|
|
@ -109,27 +156,27 @@ export function CompletionStep({ searchSpaceId }: CompletionStepProps) {
|
|||
Jump right into the AI researcher and start asking questions
|
||||
</CardDescription>
|
||||
</CardHeader>
|
||||
<CardContent className="space-y-4">
|
||||
<CardContent className="space-y-4 relative">
|
||||
<div className="space-y-2 text-sm text-muted-foreground">
|
||||
<div className="flex items-center gap-2">
|
||||
<CheckCircle className="w-4 h-4 text-green-600" />
|
||||
<CheckCircle className="w-4 h-4 text-emerald-500" />
|
||||
<span>AI-powered conversations</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<CheckCircle className="w-4 h-4 text-green-600" />
|
||||
<CheckCircle className="w-4 h-4 text-emerald-500" />
|
||||
<span>Research and explore topics</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<CheckCircle className="w-4 h-4 text-green-600" />
|
||||
<CheckCircle className="w-4 h-4 text-emerald-500" />
|
||||
<span>Get instant insights</span>
|
||||
</div>
|
||||
</div>
|
||||
<Button
|
||||
className="w-full group-hover:bg-primary/90"
|
||||
className="w-full bg-purple-600 hover:bg-purple-700 text-white group-hover:shadow-lg group-hover:shadow-purple-500/25 transition-all duration-300"
|
||||
onClick={() => router.push(`/dashboard/${searchSpaceId}/researcher`)}
|
||||
>
|
||||
Start Chatting
|
||||
<ArrowRight className="w-4 h-4 ml-2" />
|
||||
<ArrowRight className="w-4 h-4 ml-2 group-hover:translate-x-1 transition-transform" />
|
||||
</Button>
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
|
|
|||
|
|
@ -16,7 +16,8 @@ const demoPlans = [
|
|||
"Podcasts support with local TTS providers.",
|
||||
"Connects with 15+ external sources.",
|
||||
"Cross-Browser Extension for dynamic webpages including authenticated content",
|
||||
"Upcoming: Mergeable MindMaps",
|
||||
"Role-based access permissions",
|
||||
"Collaboration and multiplayer features",
|
||||
"Upcoming: Note Management",
|
||||
],
|
||||
description: "Open source version with powerful features",
|
||||
|
|
@ -32,9 +33,10 @@ const demoPlans = [
|
|||
features: [
|
||||
"Everything in Community",
|
||||
"Priority Support",
|
||||
"Role-based access permissions",
|
||||
"Collaboration and multiplayer features",
|
||||
"Advanced security features",
|
||||
"Audit logs and compliance",
|
||||
"SSO, OIDC & SAML",
|
||||
"SLA guarantee",
|
||||
],
|
||||
description: "For large organizations with specific needs",
|
||||
buttonText: "Contact Sales",
|
||||
|
|
|
|||
|
|
@ -413,19 +413,6 @@ export function LLMRoleManager({ searchSpaceId }: LLMRoleManagerProps) {
|
|||
</div>
|
||||
</CardHeader>
|
||||
<CardContent className="space-y-4">
|
||||
<div className="space-y-2">
|
||||
<div className="text-sm text-muted-foreground">
|
||||
<strong>Use cases:</strong> {role.examples}
|
||||
</div>
|
||||
<div className="flex flex-wrap gap-1">
|
||||
{role.characteristics.map((char, idx) => (
|
||||
<Badge key={idx} variant="outline" className="text-xs">
|
||||
{char}
|
||||
</Badge>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="space-y-2">
|
||||
<Label className="text-sm font-medium">Assign LLM Configuration:</Label>
|
||||
<Select
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ import {
|
|||
SquareTerminal,
|
||||
Trash2,
|
||||
Undo2,
|
||||
Users,
|
||||
} from "lucide-react";
|
||||
import Image from "next/image";
|
||||
import Link from "next/link";
|
||||
|
|
@ -54,6 +55,7 @@ export const iconMap: Record<string, LucideIcon> = {
|
|||
Trash2,
|
||||
Podcast,
|
||||
FileText,
|
||||
Users,
|
||||
};
|
||||
|
||||
const defaultData = {
|
||||
|
|
|
|||
|
|
@ -43,6 +43,7 @@ export function NavMain({ items }: { items: NavItem[] }) {
|
|||
Podcasts: "podcasts",
|
||||
Logs: "logs",
|
||||
Platform: "platform",
|
||||
Team: "team",
|
||||
};
|
||||
|
||||
const key = titleMap[title];
|
||||
|
|
|
|||
|
|
@ -19,11 +19,14 @@ interface ConnectorsTabProps {
|
|||
export function ConnectorsTab({ searchSpaceId }: ConnectorsTabProps) {
|
||||
const t = useTranslations("add_connector");
|
||||
const [expandedCategories, setExpandedCategories] = useState<string[]>([
|
||||
"search-engines",
|
||||
"knowledge-bases",
|
||||
"web-search",
|
||||
"messaging",
|
||||
"project-management",
|
||||
"team-chats",
|
||||
"communication",
|
||||
"documentation",
|
||||
"development",
|
||||
"databases",
|
||||
"productivity",
|
||||
"web-crawling",
|
||||
]);
|
||||
|
||||
const toggleCategory = (categoryId: string) => {
|
||||
|
|
|
|||
|
|
@ -5,8 +5,21 @@ import type { ConnectorCategory } from "./types";
|
|||
|
||||
export const connectorCategories: ConnectorCategory[] = [
|
||||
{
|
||||
id: "search-engines",
|
||||
title: "search_engines",
|
||||
id: "web-crawling",
|
||||
title: "web_crawling",
|
||||
connectors: [
|
||||
{
|
||||
id: "webcrawler-connector",
|
||||
title: "Web Pages",
|
||||
description: "webcrawler_desc",
|
||||
icon: getConnectorIcon(EnumConnectorName.WEBCRAWLER_CONNECTOR, "h-6 w-6"),
|
||||
status: "available",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
id: "web-search",
|
||||
title: "web_search",
|
||||
connectors: [
|
||||
{
|
||||
id: "tavily-api",
|
||||
|
|
@ -29,13 +42,6 @@ export const connectorCategories: ConnectorCategory[] = [
|
|||
icon: getConnectorIcon(EnumConnectorName.LINKUP_API, "h-6 w-6"),
|
||||
status: "available",
|
||||
},
|
||||
{
|
||||
id: "elasticsearch-connector",
|
||||
title: "Elasticsearch",
|
||||
description: "elasticsearch_desc",
|
||||
icon: getConnectorIcon(EnumConnectorName.ELASTICSEARCH_CONNECTOR, "h-6 w-6"),
|
||||
status: "available",
|
||||
},
|
||||
{
|
||||
id: "baidu-search-api",
|
||||
title: "Baidu Search",
|
||||
|
|
@ -46,8 +52,8 @@ export const connectorCategories: ConnectorCategory[] = [
|
|||
],
|
||||
},
|
||||
{
|
||||
id: "team-chats",
|
||||
title: "team_chats",
|
||||
id: "messaging",
|
||||
title: "messaging",
|
||||
connectors: [
|
||||
{
|
||||
id: "slack-connector",
|
||||
|
|
@ -56,13 +62,6 @@ export const connectorCategories: ConnectorCategory[] = [
|
|||
icon: getConnectorIcon(EnumConnectorName.SLACK_CONNECTOR, "h-6 w-6"),
|
||||
status: "available",
|
||||
},
|
||||
{
|
||||
id: "ms-teams",
|
||||
title: "Microsoft Teams",
|
||||
description: "teams_desc",
|
||||
icon: <IconBrandWindows className="h-6 w-6" />,
|
||||
status: "coming-soon",
|
||||
},
|
||||
{
|
||||
id: "discord-connector",
|
||||
title: "Discord",
|
||||
|
|
@ -70,6 +69,13 @@ export const connectorCategories: ConnectorCategory[] = [
|
|||
icon: getConnectorIcon(EnumConnectorName.DISCORD_CONNECTOR, "h-6 w-6"),
|
||||
status: "available",
|
||||
},
|
||||
{
|
||||
id: "ms-teams",
|
||||
title: "Microsoft Teams",
|
||||
description: "teams_desc",
|
||||
icon: <IconBrandWindows className="h-6 w-6" />,
|
||||
status: "coming-soon",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
|
|
@ -100,8 +106,8 @@ export const connectorCategories: ConnectorCategory[] = [
|
|||
],
|
||||
},
|
||||
{
|
||||
id: "knowledge-bases",
|
||||
title: "knowledge_bases",
|
||||
id: "documentation",
|
||||
title: "documentation",
|
||||
connectors: [
|
||||
{
|
||||
id: "notion-connector",
|
||||
|
|
@ -110,6 +116,19 @@ export const connectorCategories: ConnectorCategory[] = [
|
|||
icon: getConnectorIcon(EnumConnectorName.NOTION_CONNECTOR, "h-6 w-6"),
|
||||
status: "available",
|
||||
},
|
||||
{
|
||||
id: "confluence-connector",
|
||||
title: "Confluence",
|
||||
description: "confluence_desc",
|
||||
icon: getConnectorIcon(EnumConnectorName.CONFLUENCE_CONNECTOR, "h-6 w-6"),
|
||||
status: "available",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
id: "development",
|
||||
title: "development",
|
||||
connectors: [
|
||||
{
|
||||
id: "github-connector",
|
||||
title: "GitHub",
|
||||
|
|
@ -117,11 +136,17 @@ export const connectorCategories: ConnectorCategory[] = [
|
|||
icon: getConnectorIcon(EnumConnectorName.GITHUB_CONNECTOR, "h-6 w-6"),
|
||||
status: "available",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
id: "databases",
|
||||
title: "databases",
|
||||
connectors: [
|
||||
{
|
||||
id: "confluence-connector",
|
||||
title: "Confluence",
|
||||
description: "confluence_desc",
|
||||
icon: getConnectorIcon(EnumConnectorName.CONFLUENCE_CONNECTOR, "h-6 w-6"),
|
||||
id: "elasticsearch-connector",
|
||||
title: "Elasticsearch",
|
||||
description: "elasticsearch_desc",
|
||||
icon: getConnectorIcon(EnumConnectorName.ELASTICSEARCH_CONNECTOR, "h-6 w-6"),
|
||||
status: "available",
|
||||
},
|
||||
{
|
||||
|
|
@ -131,18 +156,11 @@ export const connectorCategories: ConnectorCategory[] = [
|
|||
icon: getConnectorIcon(EnumConnectorName.AIRTABLE_CONNECTOR, "h-6 w-6"),
|
||||
status: "available",
|
||||
},
|
||||
{
|
||||
id: "luma-connector",
|
||||
title: "Luma",
|
||||
description: "luma_desc",
|
||||
icon: getConnectorIcon(EnumConnectorName.LUMA_CONNECTOR, "h-6 w-6"),
|
||||
status: "available",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
id: "communication",
|
||||
title: "communication",
|
||||
id: "productivity",
|
||||
title: "productivity",
|
||||
connectors: [
|
||||
{
|
||||
id: "google-calendar-connector",
|
||||
|
|
@ -158,6 +176,13 @@ export const connectorCategories: ConnectorCategory[] = [
|
|||
icon: getConnectorIcon(EnumConnectorName.GOOGLE_GMAIL_CONNECTOR, "h-6 w-6"),
|
||||
status: "available",
|
||||
},
|
||||
{
|
||||
id: "luma-connector",
|
||||
title: "Luma",
|
||||
description: "luma_desc",
|
||||
icon: getConnectorIcon(EnumConnectorName.LUMA_CONNECTOR, "h-6 w-6"),
|
||||
status: "available",
|
||||
},
|
||||
{
|
||||
id: "zoom",
|
||||
title: "Zoom",
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@ Before you begin, ensure you have:
|
|||
| STT_SERVICE | Speech-to-Text API provider for Audio Files (e.g., `local/base`, `openai/whisper-1`). See [supported providers](https://docs.litellm.ai/docs/audio_transcription#supported-providers) |
|
||||
| STT_SERVICE_API_KEY | (Optional if local) API key for the Speech-to-Text service |
|
||||
| STT_SERVICE_API_BASE | (Optional) Custom API base URL for the Speech-to-Text service |
|
||||
| FIRECRAWL_API_KEY | API key for Firecrawl service for web crawling |
|
||||
| FIRECRAWL_API_KEY | API key for Firecrawl service for web crawling |
|
||||
| ETL_SERVICE | Document parsing service: `UNSTRUCTURED` (supports 34+ formats), `LLAMACLOUD` (supports 50+ formats including legacy document types), or `DOCLING` (local processing, supports PDF, Office docs, images, HTML, CSV) |
|
||||
| UNSTRUCTURED_API_KEY | API key for Unstructured.io service for document parsing (required if ETL_SERVICE=UNSTRUCTURED) |
|
||||
| LLAMA_CLOUD_API_KEY | API key for LlamaCloud service for document parsing (required if ETL_SERVICE=LLAMACLOUD) |
|
||||
|
|
|
|||
|
|
@ -17,4 +17,5 @@ export enum EnumConnectorName {
|
|||
AIRTABLE_CONNECTOR = "AIRTABLE_CONNECTOR",
|
||||
LUMA_CONNECTOR = "LUMA_CONNECTOR",
|
||||
ELASTICSEARCH_CONNECTOR = "ELASTICSEARCH_CONNECTOR",
|
||||
WEBCRAWLER_CONNECTOR = "WEBCRAWLER_CONNECTOR",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -59,11 +59,13 @@ export const getConnectorIcon = (connectorType: EnumConnectorName | string, clas
|
|||
return <IconSparkles {...iconProps} />;
|
||||
case EnumConnectorName.ELASTICSEARCH_CONNECTOR:
|
||||
return <IconBrandElastic {...iconProps} />;
|
||||
case EnumConnectorName.WEBCRAWLER_CONNECTOR:
|
||||
return <Globe {...iconProps} />;
|
||||
// Additional cases for non-enum connector types
|
||||
case "YOUTUBE_VIDEO":
|
||||
return <IconBrandYoutube {...iconProps} />;
|
||||
case "CRAWLED_URL":
|
||||
return <Globe {...iconProps} />;
|
||||
case "YOUTUBE_VIDEO":
|
||||
return <IconBrandYoutube {...iconProps} />;
|
||||
case "FILE":
|
||||
return <File {...iconProps} />;
|
||||
case "EXTENSION":
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
export * from "./use-document-by-chunk";
|
||||
export * from "./use-logs";
|
||||
export * from "./use-rbac";
|
||||
export * from "./use-search-source-connectors";
|
||||
export * from "./use-search-space";
|
||||
export * from "./use-user";
|
||||
|
|
|
|||
|
|
@ -97,6 +97,8 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string)
|
|||
JIRA_API_TOKEN: "",
|
||||
LUMA_API_KEY: "",
|
||||
ELASTICSEARCH_API_KEY: "",
|
||||
FIRECRAWL_API_KEY: "",
|
||||
INITIAL_URLS: "",
|
||||
},
|
||||
});
|
||||
|
||||
|
|
@ -142,6 +144,8 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string)
|
|||
JIRA_API_TOKEN: config.JIRA_API_TOKEN || "",
|
||||
LUMA_API_KEY: config.LUMA_API_KEY || "",
|
||||
ELASTICSEARCH_API_KEY: config.ELASTICSEARCH_API_KEY || "",
|
||||
FIRECRAWL_API_KEY: config.FIRECRAWL_API_KEY || "",
|
||||
INITIAL_URLS: config.INITIAL_URLS || "",
|
||||
});
|
||||
if (currentConnector.connector_type === "GITHUB_CONNECTOR") {
|
||||
const savedRepos = config.repo_full_names || [];
|
||||
|
|
@ -469,6 +473,35 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string)
|
|||
newConfig = { ELASTICSEARCH_API_KEY: formData.ELASTICSEARCH_API_KEY };
|
||||
}
|
||||
break;
|
||||
case "WEBCRAWLER_CONNECTOR":
|
||||
if (
|
||||
formData.FIRECRAWL_API_KEY !== originalConfig.FIRECRAWL_API_KEY ||
|
||||
formData.INITIAL_URLS !== originalConfig.INITIAL_URLS
|
||||
) {
|
||||
newConfig = {};
|
||||
|
||||
if (formData.FIRECRAWL_API_KEY && formData.FIRECRAWL_API_KEY.trim()) {
|
||||
if (!formData.FIRECRAWL_API_KEY.startsWith("fc-")) {
|
||||
toast.warning(
|
||||
"Firecrawl API keys typically start with 'fc-'. Please verify your key."
|
||||
);
|
||||
}
|
||||
newConfig.FIRECRAWL_API_KEY = formData.FIRECRAWL_API_KEY.trim();
|
||||
} else if (originalConfig.FIRECRAWL_API_KEY) {
|
||||
toast.info(
|
||||
"Firecrawl API key removed. Web crawler will use AsyncChromiumLoader as fallback."
|
||||
);
|
||||
}
|
||||
|
||||
if (formData.INITIAL_URLS !== undefined) {
|
||||
if (formData.INITIAL_URLS && formData.INITIAL_URLS.trim()) {
|
||||
newConfig.INITIAL_URLS = formData.INITIAL_URLS.trim();
|
||||
} else if (originalConfig.INITIAL_URLS) {
|
||||
toast.info("URLs removed from crawler configuration.");
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (newConfig !== null) {
|
||||
|
|
@ -562,6 +595,9 @@ export function useConnectorEditPage(connectorId: number, searchSpaceId: string)
|
|||
"ELASTICSEARCH_API_KEY",
|
||||
newlySavedConfig.ELASTICSEARCH_API_KEY || ""
|
||||
);
|
||||
} else if (connector.connector_type === "WEBCRAWLER_CONNECTOR") {
|
||||
editForm.setValue("FIRECRAWL_API_KEY", newlySavedConfig.FIRECRAWL_API_KEY || "");
|
||||
editForm.setValue("INITIAL_URLS", newlySavedConfig.INITIAL_URLS || "");
|
||||
}
|
||||
}
|
||||
if (connector.connector_type === "GITHUB_CONNECTOR") {
|
||||
|
|
|
|||
773
surfsense_web/hooks/use-rbac.ts
Normal file
773
surfsense_web/hooks/use-rbac.ts
Normal file
|
|
@ -0,0 +1,773 @@
|
|||
"use client";
|
||||
|
||||
import { useCallback, useEffect, useMemo, useState } from "react";
|
||||
import { toast } from "sonner";
|
||||
|
||||
// ============ Types ============
|
||||
|
||||
export interface Role {
|
||||
id: number;
|
||||
name: string;
|
||||
description: string | null;
|
||||
permissions: string[];
|
||||
is_default: boolean;
|
||||
is_system_role: boolean;
|
||||
search_space_id: number;
|
||||
created_at: string;
|
||||
}
|
||||
|
||||
export interface Member {
|
||||
id: number;
|
||||
user_id: string;
|
||||
search_space_id: number;
|
||||
role_id: number | null;
|
||||
is_owner: boolean;
|
||||
joined_at: string;
|
||||
created_at: string;
|
||||
role: Role | null;
|
||||
user_email: string | null;
|
||||
}
|
||||
|
||||
export interface Invite {
|
||||
id: number;
|
||||
invite_code: string;
|
||||
search_space_id: number;
|
||||
role_id: number | null;
|
||||
created_by_id: string | null;
|
||||
expires_at: string | null;
|
||||
max_uses: number | null;
|
||||
uses_count: number;
|
||||
is_active: boolean;
|
||||
name: string | null;
|
||||
created_at: string;
|
||||
role: Role | null;
|
||||
}
|
||||
|
||||
export interface InviteCreate {
|
||||
name?: string;
|
||||
role_id?: number;
|
||||
expires_at?: string;
|
||||
max_uses?: number;
|
||||
}
|
||||
|
||||
export interface InviteUpdate {
|
||||
name?: string;
|
||||
role_id?: number;
|
||||
expires_at?: string;
|
||||
max_uses?: number;
|
||||
is_active?: boolean;
|
||||
}
|
||||
|
||||
export interface RoleCreate {
|
||||
name: string;
|
||||
description?: string;
|
||||
permissions: string[];
|
||||
is_default?: boolean;
|
||||
}
|
||||
|
||||
export interface RoleUpdate {
|
||||
name?: string;
|
||||
description?: string;
|
||||
permissions?: string[];
|
||||
is_default?: boolean;
|
||||
}
|
||||
|
||||
export interface PermissionInfo {
|
||||
value: string;
|
||||
name: string;
|
||||
category: string;
|
||||
}
|
||||
|
||||
export interface UserAccess {
|
||||
search_space_id: number;
|
||||
search_space_name: string;
|
||||
is_owner: boolean;
|
||||
role_name: string | null;
|
||||
permissions: string[];
|
||||
}
|
||||
|
||||
export interface InviteInfo {
|
||||
search_space_name: string;
|
||||
role_name: string | null;
|
||||
is_valid: boolean;
|
||||
message: string | null;
|
||||
}
|
||||
|
||||
// ============ Members Hook ============
|
||||
|
||||
export function useMembers(searchSpaceId: number) {
|
||||
const [members, setMembers] = useState<Member[]>([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
const fetchMembers = useCallback(async () => {
|
||||
if (!searchSpaceId) return;
|
||||
|
||||
try {
|
||||
setLoading(true);
|
||||
const response = await fetch(
|
||||
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/searchspaces/${searchSpaceId}/members`,
|
||||
{
|
||||
headers: {
|
||||
Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`,
|
||||
},
|
||||
method: "GET",
|
||||
}
|
||||
);
|
||||
|
||||
if (response.status === 401) {
|
||||
localStorage.removeItem("surfsense_bearer_token");
|
||||
window.location.href = "/";
|
||||
throw new Error("Unauthorized");
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch(() => ({}));
|
||||
throw new Error(errorData.detail || "Failed to fetch members");
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
setMembers(data);
|
||||
setError(null);
|
||||
return data;
|
||||
} catch (err: any) {
|
||||
setError(err.message || "Failed to fetch members");
|
||||
console.error("Error fetching members:", err);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}, [searchSpaceId]);
|
||||
|
||||
useEffect(() => {
|
||||
fetchMembers();
|
||||
}, [fetchMembers]);
|
||||
|
||||
const updateMemberRole = useCallback(
|
||||
async (membershipId: number, roleId: number | null) => {
|
||||
try {
|
||||
const response = await fetch(
|
||||
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/searchspaces/${searchSpaceId}/members/${membershipId}`,
|
||||
{
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`,
|
||||
},
|
||||
method: "PUT",
|
||||
body: JSON.stringify({ role_id: roleId }),
|
||||
}
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch(() => ({}));
|
||||
throw new Error(errorData.detail || "Failed to update member role");
|
||||
}
|
||||
|
||||
const updatedMember = await response.json();
|
||||
setMembers((prev) => prev.map((m) => (m.id === membershipId ? updatedMember : m)));
|
||||
toast.success("Member role updated successfully");
|
||||
return updatedMember;
|
||||
} catch (err: any) {
|
||||
toast.error(err.message || "Failed to update member role");
|
||||
throw err;
|
||||
}
|
||||
},
|
||||
[searchSpaceId]
|
||||
);
|
||||
|
||||
const removeMember = useCallback(
|
||||
async (membershipId: number) => {
|
||||
try {
|
||||
const response = await fetch(
|
||||
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/searchspaces/${searchSpaceId}/members/${membershipId}`,
|
||||
{
|
||||
headers: {
|
||||
Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`,
|
||||
},
|
||||
method: "DELETE",
|
||||
}
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch(() => ({}));
|
||||
throw new Error(errorData.detail || "Failed to remove member");
|
||||
}
|
||||
|
||||
setMembers((prev) => prev.filter((m) => m.id !== membershipId));
|
||||
toast.success("Member removed successfully");
|
||||
return true;
|
||||
} catch (err: any) {
|
||||
toast.error(err.message || "Failed to remove member");
|
||||
return false;
|
||||
}
|
||||
},
|
||||
[searchSpaceId]
|
||||
);
|
||||
|
||||
const leaveSearchSpace = useCallback(async () => {
|
||||
try {
|
||||
const response = await fetch(
|
||||
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/searchspaces/${searchSpaceId}/members/me`,
|
||||
{
|
||||
headers: {
|
||||
Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`,
|
||||
},
|
||||
method: "DELETE",
|
||||
}
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch(() => ({}));
|
||||
throw new Error(errorData.detail || "Failed to leave search space");
|
||||
}
|
||||
|
||||
toast.success("Successfully left the search space");
|
||||
return true;
|
||||
} catch (err: any) {
|
||||
toast.error(err.message || "Failed to leave search space");
|
||||
return false;
|
||||
}
|
||||
}, [searchSpaceId]);
|
||||
|
||||
return {
|
||||
members,
|
||||
loading,
|
||||
error,
|
||||
fetchMembers,
|
||||
updateMemberRole,
|
||||
removeMember,
|
||||
leaveSearchSpace,
|
||||
};
|
||||
}
|
||||
|
||||
// ============ Roles Hook ============
|
||||
|
||||
export function useRoles(searchSpaceId: number) {
|
||||
const [roles, setRoles] = useState<Role[]>([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
const fetchRoles = useCallback(async () => {
|
||||
if (!searchSpaceId) return;
|
||||
|
||||
try {
|
||||
setLoading(true);
|
||||
const response = await fetch(
|
||||
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/searchspaces/${searchSpaceId}/roles`,
|
||||
{
|
||||
headers: {
|
||||
Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`,
|
||||
},
|
||||
method: "GET",
|
||||
}
|
||||
);
|
||||
|
||||
if (response.status === 401) {
|
||||
localStorage.removeItem("surfsense_bearer_token");
|
||||
window.location.href = "/";
|
||||
throw new Error("Unauthorized");
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch(() => ({}));
|
||||
throw new Error(errorData.detail || "Failed to fetch roles");
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
setRoles(data);
|
||||
setError(null);
|
||||
return data;
|
||||
} catch (err: any) {
|
||||
setError(err.message || "Failed to fetch roles");
|
||||
console.error("Error fetching roles:", err);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}, [searchSpaceId]);
|
||||
|
||||
useEffect(() => {
|
||||
fetchRoles();
|
||||
}, [fetchRoles]);
|
||||
|
||||
const createRole = useCallback(
|
||||
async (roleData: RoleCreate) => {
|
||||
try {
|
||||
const response = await fetch(
|
||||
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/searchspaces/${searchSpaceId}/roles`,
|
||||
{
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`,
|
||||
},
|
||||
method: "POST",
|
||||
body: JSON.stringify(roleData),
|
||||
}
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch(() => ({}));
|
||||
throw new Error(errorData.detail || "Failed to create role");
|
||||
}
|
||||
|
||||
const newRole = await response.json();
|
||||
setRoles((prev) => [...prev, newRole]);
|
||||
toast.success("Role created successfully");
|
||||
return newRole;
|
||||
} catch (err: any) {
|
||||
toast.error(err.message || "Failed to create role");
|
||||
throw err;
|
||||
}
|
||||
},
|
||||
[searchSpaceId]
|
||||
);
|
||||
|
||||
const updateRole = useCallback(
|
||||
async (roleId: number, roleData: RoleUpdate) => {
|
||||
try {
|
||||
const response = await fetch(
|
||||
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/searchspaces/${searchSpaceId}/roles/${roleId}`,
|
||||
{
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`,
|
||||
},
|
||||
method: "PUT",
|
||||
body: JSON.stringify(roleData),
|
||||
}
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch(() => ({}));
|
||||
throw new Error(errorData.detail || "Failed to update role");
|
||||
}
|
||||
|
||||
const updatedRole = await response.json();
|
||||
setRoles((prev) => prev.map((r) => (r.id === roleId ? updatedRole : r)));
|
||||
toast.success("Role updated successfully");
|
||||
return updatedRole;
|
||||
} catch (err: any) {
|
||||
toast.error(err.message || "Failed to update role");
|
||||
throw err;
|
||||
}
|
||||
},
|
||||
[searchSpaceId]
|
||||
);
|
||||
|
||||
const deleteRole = useCallback(
|
||||
async (roleId: number) => {
|
||||
try {
|
||||
const response = await fetch(
|
||||
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/searchspaces/${searchSpaceId}/roles/${roleId}`,
|
||||
{
|
||||
headers: {
|
||||
Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`,
|
||||
},
|
||||
method: "DELETE",
|
||||
}
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch(() => ({}));
|
||||
throw new Error(errorData.detail || "Failed to delete role");
|
||||
}
|
||||
|
||||
setRoles((prev) => prev.filter((r) => r.id !== roleId));
|
||||
toast.success("Role deleted successfully");
|
||||
return true;
|
||||
} catch (err: any) {
|
||||
toast.error(err.message || "Failed to delete role");
|
||||
return false;
|
||||
}
|
||||
},
|
||||
[searchSpaceId]
|
||||
);
|
||||
|
||||
return {
|
||||
roles,
|
||||
loading,
|
||||
error,
|
||||
fetchRoles,
|
||||
createRole,
|
||||
updateRole,
|
||||
deleteRole,
|
||||
};
|
||||
}
|
||||
|
||||
// ============ Invites Hook ============
|
||||
|
||||
export function useInvites(searchSpaceId: number) {
|
||||
const [invites, setInvites] = useState<Invite[]>([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
const fetchInvites = useCallback(async () => {
|
||||
if (!searchSpaceId) return;
|
||||
|
||||
try {
|
||||
setLoading(true);
|
||||
const response = await fetch(
|
||||
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/searchspaces/${searchSpaceId}/invites`,
|
||||
{
|
||||
headers: {
|
||||
Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`,
|
||||
},
|
||||
method: "GET",
|
||||
}
|
||||
);
|
||||
|
||||
if (response.status === 401) {
|
||||
localStorage.removeItem("surfsense_bearer_token");
|
||||
window.location.href = "/";
|
||||
throw new Error("Unauthorized");
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch(() => ({}));
|
||||
throw new Error(errorData.detail || "Failed to fetch invites");
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
setInvites(data);
|
||||
setError(null);
|
||||
return data;
|
||||
} catch (err: any) {
|
||||
setError(err.message || "Failed to fetch invites");
|
||||
console.error("Error fetching invites:", err);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}, [searchSpaceId]);
|
||||
|
||||
useEffect(() => {
|
||||
fetchInvites();
|
||||
}, [fetchInvites]);
|
||||
|
||||
const createInvite = useCallback(
|
||||
async (inviteData: InviteCreate) => {
|
||||
try {
|
||||
const response = await fetch(
|
||||
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/searchspaces/${searchSpaceId}/invites`,
|
||||
{
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`,
|
||||
},
|
||||
method: "POST",
|
||||
body: JSON.stringify(inviteData),
|
||||
}
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch(() => ({}));
|
||||
throw new Error(errorData.detail || "Failed to create invite");
|
||||
}
|
||||
|
||||
const newInvite = await response.json();
|
||||
setInvites((prev) => [...prev, newInvite]);
|
||||
toast.success("Invite created successfully");
|
||||
return newInvite;
|
||||
} catch (err: any) {
|
||||
toast.error(err.message || "Failed to create invite");
|
||||
throw err;
|
||||
}
|
||||
},
|
||||
[searchSpaceId]
|
||||
);
|
||||
|
||||
const updateInvite = useCallback(
|
||||
async (inviteId: number, inviteData: InviteUpdate) => {
|
||||
try {
|
||||
const response = await fetch(
|
||||
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/searchspaces/${searchSpaceId}/invites/${inviteId}`,
|
||||
{
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`,
|
||||
},
|
||||
method: "PUT",
|
||||
body: JSON.stringify(inviteData),
|
||||
}
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch(() => ({}));
|
||||
throw new Error(errorData.detail || "Failed to update invite");
|
||||
}
|
||||
|
||||
const updatedInvite = await response.json();
|
||||
setInvites((prev) => prev.map((i) => (i.id === inviteId ? updatedInvite : i)));
|
||||
toast.success("Invite updated successfully");
|
||||
return updatedInvite;
|
||||
} catch (err: any) {
|
||||
toast.error(err.message || "Failed to update invite");
|
||||
throw err;
|
||||
}
|
||||
},
|
||||
[searchSpaceId]
|
||||
);
|
||||
|
||||
const revokeInvite = useCallback(
|
||||
async (inviteId: number) => {
|
||||
try {
|
||||
const response = await fetch(
|
||||
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/searchspaces/${searchSpaceId}/invites/${inviteId}`,
|
||||
{
|
||||
headers: {
|
||||
Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`,
|
||||
},
|
||||
method: "DELETE",
|
||||
}
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch(() => ({}));
|
||||
throw new Error(errorData.detail || "Failed to revoke invite");
|
||||
}
|
||||
|
||||
setInvites((prev) => prev.filter((i) => i.id !== inviteId));
|
||||
toast.success("Invite revoked successfully");
|
||||
return true;
|
||||
} catch (err: any) {
|
||||
toast.error(err.message || "Failed to revoke invite");
|
||||
return false;
|
||||
}
|
||||
},
|
||||
[searchSpaceId]
|
||||
);
|
||||
|
||||
return {
|
||||
invites,
|
||||
loading,
|
||||
error,
|
||||
fetchInvites,
|
||||
createInvite,
|
||||
updateInvite,
|
||||
revokeInvite,
|
||||
};
|
||||
}
|
||||
|
||||
// ============ Permissions Hook ============
|
||||
|
||||
export function usePermissions() {
|
||||
const [permissions, setPermissions] = useState<PermissionInfo[]>([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
const fetchPermissions = useCallback(async () => {
|
||||
try {
|
||||
setLoading(true);
|
||||
const response = await fetch(
|
||||
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/permissions`,
|
||||
{
|
||||
headers: {
|
||||
Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`,
|
||||
},
|
||||
method: "GET",
|
||||
}
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch(() => ({}));
|
||||
throw new Error(errorData.detail || "Failed to fetch permissions");
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
setPermissions(data.permissions);
|
||||
setError(null);
|
||||
return data.permissions;
|
||||
} catch (err: any) {
|
||||
setError(err.message || "Failed to fetch permissions");
|
||||
console.error("Error fetching permissions:", err);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
fetchPermissions();
|
||||
}, [fetchPermissions]);
|
||||
|
||||
// Group permissions by category
|
||||
const groupedPermissions = useMemo(() => {
|
||||
const groups: Record<string, PermissionInfo[]> = {};
|
||||
for (const perm of permissions) {
|
||||
if (!groups[perm.category]) {
|
||||
groups[perm.category] = [];
|
||||
}
|
||||
groups[perm.category].push(perm);
|
||||
}
|
||||
return groups;
|
||||
}, [permissions]);
|
||||
|
||||
return {
|
||||
permissions,
|
||||
groupedPermissions,
|
||||
loading,
|
||||
error,
|
||||
fetchPermissions,
|
||||
};
|
||||
}
|
||||
|
||||
// ============ User Access Hook ============
|
||||
|
||||
export function useUserAccess(searchSpaceId: number) {
|
||||
const [access, setAccess] = useState<UserAccess | null>(null);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
const fetchAccess = useCallback(async () => {
|
||||
if (!searchSpaceId) return;
|
||||
|
||||
try {
|
||||
setLoading(true);
|
||||
const response = await fetch(
|
||||
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/searchspaces/${searchSpaceId}/my-access`,
|
||||
{
|
||||
headers: {
|
||||
Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`,
|
||||
},
|
||||
method: "GET",
|
||||
}
|
||||
);
|
||||
|
||||
if (response.status === 401) {
|
||||
localStorage.removeItem("surfsense_bearer_token");
|
||||
window.location.href = "/";
|
||||
throw new Error("Unauthorized");
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch(() => ({}));
|
||||
throw new Error(errorData.detail || "Failed to fetch access info");
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
setAccess(data);
|
||||
setError(null);
|
||||
return data;
|
||||
} catch (err: any) {
|
||||
setError(err.message || "Failed to fetch access info");
|
||||
console.error("Error fetching access:", err);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}, [searchSpaceId]);
|
||||
|
||||
useEffect(() => {
|
||||
fetchAccess();
|
||||
}, [fetchAccess]);
|
||||
|
||||
// Helper function to check if user has a specific permission
|
||||
const hasPermission = useCallback(
|
||||
(permission: string) => {
|
||||
if (!access) return false;
|
||||
// Owner/full access check
|
||||
if (access.permissions.includes("*")) return true;
|
||||
return access.permissions.includes(permission);
|
||||
},
|
||||
[access]
|
||||
);
|
||||
|
||||
// Helper function to check if user has any of the given permissions
|
||||
const hasAnyPermission = useCallback(
|
||||
(permissions: string[]) => {
|
||||
if (!access) return false;
|
||||
if (access.permissions.includes("*")) return true;
|
||||
return permissions.some((p) => access.permissions.includes(p));
|
||||
},
|
||||
[access]
|
||||
);
|
||||
|
||||
return {
|
||||
access,
|
||||
loading,
|
||||
error,
|
||||
fetchAccess,
|
||||
hasPermission,
|
||||
hasAnyPermission,
|
||||
};
|
||||
}
|
||||
|
||||
// ============ Invite Info Hook (Public) ============
|
||||
|
||||
export function useInviteInfo(inviteCode: string | null) {
|
||||
const [inviteInfo, setInviteInfo] = useState<InviteInfo | null>(null);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
const fetchInviteInfo = useCallback(async () => {
|
||||
if (!inviteCode) {
|
||||
setLoading(false);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
setLoading(true);
|
||||
const response = await fetch(
|
||||
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/invites/${inviteCode}/info`,
|
||||
{
|
||||
method: "GET",
|
||||
}
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch(() => ({}));
|
||||
throw new Error(errorData.detail || "Failed to fetch invite info");
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
setInviteInfo(data);
|
||||
setError(null);
|
||||
return data;
|
||||
} catch (err: any) {
|
||||
setError(err.message || "Failed to fetch invite info");
|
||||
console.error("Error fetching invite info:", err);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}, [inviteCode]);
|
||||
|
||||
useEffect(() => {
|
||||
fetchInviteInfo();
|
||||
}, [fetchInviteInfo]);
|
||||
|
||||
const acceptInvite = useCallback(async () => {
|
||||
if (!inviteCode) {
|
||||
toast.error("No invite code provided");
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await fetch(
|
||||
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/invites/accept`,
|
||||
{
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${localStorage.getItem("surfsense_bearer_token")}`,
|
||||
},
|
||||
method: "POST",
|
||||
body: JSON.stringify({ invite_code: inviteCode }),
|
||||
}
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch(() => ({}));
|
||||
throw new Error(errorData.detail || "Failed to accept invite");
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
toast.success(data.message || "Successfully joined the search space");
|
||||
return data;
|
||||
} catch (err: any) {
|
||||
toast.error(err.message || "Failed to accept invite");
|
||||
throw err;
|
||||
}
|
||||
}, [inviteCode]);
|
||||
|
||||
return {
|
||||
inviteInfo,
|
||||
loading,
|
||||
error,
|
||||
fetchInviteInfo,
|
||||
acceptInvite,
|
||||
};
|
||||
}
|
||||
|
|
@ -10,6 +10,8 @@ interface SearchSpace {
|
|||
created_at: string;
|
||||
citations_enabled: boolean;
|
||||
qna_custom_instructions: string | null;
|
||||
member_count: number;
|
||||
is_owner: boolean;
|
||||
}
|
||||
|
||||
export function useSearchSpaces() {
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ export const getConnectorTypeDisplay = (type: string): string => {
|
|||
AIRTABLE_CONNECTOR: "Airtable",
|
||||
LUMA_CONNECTOR: "Luma",
|
||||
ELASTICSEARCH_CONNECTOR: "Elasticsearch",
|
||||
WEBCRAWLER_CONNECTOR: "Web Pages",
|
||||
};
|
||||
return typeMap[type] || type;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -103,6 +103,7 @@
|
|||
"surfsense_dashboard": "SurfSense Dashboard",
|
||||
"welcome_message": "Welcome to your SurfSense dashboard.",
|
||||
"your_search_spaces": "Your Search Spaces",
|
||||
"shared": "Shared",
|
||||
"create_search_space": "Create Search Space",
|
||||
"add_new_search_space": "Add New Search Space",
|
||||
"loading": "Loading",
|
||||
|
|
@ -149,7 +150,8 @@
|
|||
"podcasts": "Podcasts",
|
||||
"logs": "Logs",
|
||||
"all_search_spaces": "All Search Spaces",
|
||||
"chat": "Chat"
|
||||
"chat": "Chat",
|
||||
"team": "Team"
|
||||
},
|
||||
"pricing": {
|
||||
"title": "SurfSense Pricing",
|
||||
|
|
@ -304,11 +306,14 @@
|
|||
"add_connector": {
|
||||
"title": "Connect Your Tools",
|
||||
"subtitle": "Integrate with your favorite services to enhance your research capabilities.",
|
||||
"search_engines": "Search Engines",
|
||||
"team_chats": "Team Chats",
|
||||
"web_search": "Web Search",
|
||||
"messaging": "Messaging",
|
||||
"project_management": "Project Management",
|
||||
"knowledge_bases": "Knowledge Bases",
|
||||
"communication": "Communication",
|
||||
"documentation": "Documentation",
|
||||
"development": "Development",
|
||||
"databases": "Databases",
|
||||
"productivity": "Productivity",
|
||||
"web_crawling": "Web Crawling",
|
||||
"connect": "Connect",
|
||||
"coming_soon": "Coming Soon",
|
||||
"connected": "Connected",
|
||||
|
|
@ -328,10 +333,11 @@
|
|||
"github_desc": "Connect a GitHub PAT to index code and docs from accessible repositories.",
|
||||
"confluence_desc": "Connect to Confluence to search pages, comments and documentation.",
|
||||
"airtable_desc": "Connect to Airtable to search records, tables and database content.",
|
||||
"luma_desc": "Connect to Luma to search events",
|
||||
"luma_desc": "Connect to Luma to search events, meetups and gatherings.",
|
||||
"calendar_desc": "Connect to Google Calendar to search events, meetings and schedules.",
|
||||
"gmail_desc": "Connect to your Gmail account to search through your emails.",
|
||||
"zoom_desc": "Connect to Zoom to access meeting recordings and transcripts."
|
||||
"zoom_desc": "Connect to Zoom to access meeting recordings and transcripts.",
|
||||
"webcrawler_desc": "Crawl and index content from any public web pages."
|
||||
},
|
||||
"upload_documents": {
|
||||
"title": "Upload Documents",
|
||||
|
|
|
|||
|
|
@ -103,6 +103,7 @@
|
|||
"surfsense_dashboard": "SurfSense 仪表盘",
|
||||
"welcome_message": "欢迎来到您的 SurfSense 仪表盘。",
|
||||
"your_search_spaces": "您的搜索空间",
|
||||
"shared": "共享",
|
||||
"create_search_space": "创建搜索空间",
|
||||
"add_new_search_space": "添加新的搜索空间",
|
||||
"loading": "加载中",
|
||||
|
|
@ -149,7 +150,8 @@
|
|||
"podcasts": "播客",
|
||||
"logs": "日志",
|
||||
"all_search_spaces": "所有搜索空间",
|
||||
"chat": "聊天"
|
||||
"chat": "聊天",
|
||||
"team": "团队"
|
||||
},
|
||||
"pricing": {
|
||||
"title": "SurfSense 定价",
|
||||
|
|
@ -304,11 +306,14 @@
|
|||
"add_connector": {
|
||||
"title": "连接您的工具",
|
||||
"subtitle": "集成您喜欢的服务以增强研究能力。",
|
||||
"search_engines": "搜索引擎",
|
||||
"team_chats": "团队聊天",
|
||||
"web_search": "网络搜索",
|
||||
"messaging": "即时通讯",
|
||||
"project_management": "项目管理",
|
||||
"knowledge_bases": "知识库",
|
||||
"communication": "通讯",
|
||||
"documentation": "文档协作",
|
||||
"development": "开发工具",
|
||||
"databases": "数据库",
|
||||
"productivity": "效率工具",
|
||||
"web_crawling": "网页爬取",
|
||||
"connect": "连接",
|
||||
"coming_soon": "即将推出",
|
||||
"connected": "已连接",
|
||||
|
|
@ -328,10 +333,11 @@
|
|||
"github_desc": "连接 GitHub PAT 以索引可访问存储库的代码和文档。",
|
||||
"confluence_desc": "连接到 Confluence 以搜索页面、评论和文档。",
|
||||
"airtable_desc": "连接到 Airtable 以搜索记录、表格和数据库内容。",
|
||||
"luma_desc": "连接到 Luma 以搜索活动",
|
||||
"luma_desc": "连接到 Luma 以搜索活动、聚会和集会。",
|
||||
"calendar_desc": "连接到 Google 日历以搜索活动、会议和日程。",
|
||||
"gmail_desc": "连接到您的 Gmail 账户以搜索您的电子邮件。",
|
||||
"zoom_desc": "连接到 Zoom 以访问会议录制和转录。"
|
||||
"zoom_desc": "连接到 Zoom 以访问会议录制和转录。",
|
||||
"webcrawler_desc": "爬取和索引任何公开网页的内容。"
|
||||
},
|
||||
"upload_documents": {
|
||||
"title": "上传文档",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue