diff --git a/.vscode/launch.json b/.vscode/launch.json index dd0c17501..dfe20d832 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -32,6 +32,19 @@ "console": "integratedTerminal", "justMyCode": false, "cwd": "${workspaceFolder}/surfsense_backend" + }, + { + "name": "Python Debugger: Chat DeepAgent", + "type": "debugpy", + "request": "launch", + "module": "app.agents.new_chat.chat_deepagent", + "console": "integratedTerminal", + "justMyCode": false, + "cwd": "${workspaceFolder}/surfsense_backend", + "python": "${command:python.interpreterPath}", + "env": { + "PYTHONPATH": "${workspaceFolder}/surfsense_backend" + } } ] } \ No newline at end of file diff --git a/surfsense_backend/app/agents/new_chat/__init__.py b/surfsense_backend/app/agents/new_chat/__init__.py new file mode 100644 index 000000000..45edb2362 --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/__init__.py @@ -0,0 +1,27 @@ +"""Chat agents module.""" + +from app.agents.new_chat.chat_deepagent import ( + SURFSENSE_CITATION_INSTRUCTIONS, + SURFSENSE_SYSTEM_PROMPT, + SurfSenseContextSchema, + build_surfsense_system_prompt, + create_chat_litellm_from_config, + create_search_knowledge_base_tool, + create_surfsense_deep_agent, + format_documents_for_context, + load_llm_config_from_yaml, + search_knowledge_base_async, +) + +__all__ = [ + "SURFSENSE_CITATION_INSTRUCTIONS", + "SURFSENSE_SYSTEM_PROMPT", + "SurfSenseContextSchema", + "build_surfsense_system_prompt", + "create_chat_litellm_from_config", + "create_search_knowledge_base_tool", + "create_surfsense_deep_agent", + "format_documents_for_context", + "load_llm_config_from_yaml", + "search_knowledge_base_async", +] diff --git a/surfsense_backend/app/agents/new_chat/chat_deepagent.py b/surfsense_backend/app/agents/new_chat/chat_deepagent.py new file mode 100644 index 000000000..561fbd3cf --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py @@ -0,0 +1,998 @@ +""" +Test script for create_deep_agent with ChatLiteLLM from global_llm_config.yaml + +This demonstrates: +1. Loading LLM config from global_llm_config.yaml +2. Creating a ChatLiteLLM instance +3. Using context_schema to add custom state fields +4. Creating a search_knowledge_base tool similar to fetch_relevant_documents +""" + +import sys +from pathlib import Path + +# Add parent directory to path so 'app' module can be found when running directly +_THIS_FILE = Path(__file__).resolve() +_BACKEND_ROOT = _THIS_FILE.parent.parent.parent.parent # surfsense_backend/ +if str(_BACKEND_ROOT) not in sys.path: + sys.path.insert(0, str(_BACKEND_ROOT)) + +import asyncio +import json +from datetime import UTC, datetime, timedelta +from typing import Any, TypedDict + +import yaml +from deepagents import create_deep_agent +from langchain_core.messages import HumanMessage +from langchain_core.tools import tool +from langchain_litellm import ChatLiteLLM +from sqlalchemy.ext.asyncio import AsyncSession + +from app.db import async_session_maker +from app.services.connector_service import ConnectorService + +# ============================================================================= +# LLM Configuration Loading +# ============================================================================= + + +def load_llm_config_from_yaml(llm_config_id: int = -1) -> dict | None: + """ + Load a specific LLM config from global_llm_config.yaml. + + Args: + llm_config_id: The id of the config to load (default: -1) + + Returns: + LLM config dict or None if not found + """ + # Get the config file path + base_dir = Path(__file__).resolve().parent.parent.parent.parent + config_file = base_dir / "app" / "config" / "global_llm_config.yaml" + + # Fallback to example file if main config doesn't exist + if not config_file.exists(): + config_file = base_dir / "app" / "config" / "global_llm_config.example.yaml" + if not config_file.exists(): + print("Error: No global_llm_config.yaml or example file found") + return None + + try: + with open(config_file, encoding="utf-8") as f: + data = yaml.safe_load(f) + configs = data.get("global_llm_configs", []) + for cfg in configs: + if isinstance(cfg, dict) and cfg.get("id") == llm_config_id: + return cfg + + print(f"Error: Global LLM config id {llm_config_id} not found") + return None + except Exception as e: + print(f"Error loading config: {e}") + return None + + +def create_chat_litellm_from_config(llm_config: dict) -> ChatLiteLLM | None: + """ + Create a ChatLiteLLM instance from a global LLM config. + + Args: + llm_config: LLM configuration dictionary from YAML + + Returns: + ChatLiteLLM instance or None on error + """ + # Provider mapping (same as in llm_service.py) + provider_map = { + "OPENAI": "openai", + "ANTHROPIC": "anthropic", + "GROQ": "groq", + "COHERE": "cohere", + "GOOGLE": "gemini", + "OLLAMA": "ollama", + "MISTRAL": "mistral", + "AZURE_OPENAI": "azure", + "OPENROUTER": "openrouter", + "XAI": "xai", + "BEDROCK": "bedrock", + "VERTEX_AI": "vertex_ai", + "TOGETHER_AI": "together_ai", + "FIREWORKS_AI": "fireworks_ai", + "DEEPSEEK": "openai", + "ALIBABA_QWEN": "openai", + "MOONSHOT": "openai", + "ZHIPU": "openai", + } + + # Build the model string + if llm_config.get("custom_provider"): + model_string = f"{llm_config['custom_provider']}/{llm_config['model_name']}" + else: + provider = llm_config.get("provider", "").upper() + provider_prefix = provider_map.get(provider, provider.lower()) + model_string = f"{provider_prefix}/{llm_config['model_name']}" + + # Create ChatLiteLLM instance + litellm_kwargs = { + "model": model_string, + "api_key": llm_config.get("api_key"), + } + + # Add optional parameters + if llm_config.get("api_base"): + litellm_kwargs["api_base"] = llm_config["api_base"] + + # Add any additional litellm parameters + if llm_config.get("litellm_params"): + litellm_kwargs.update(llm_config["litellm_params"]) + + return ChatLiteLLM(**litellm_kwargs) + + +# ============================================================================= +# Custom Context Schema +# ============================================================================= + + +class SurfSenseContextSchema(TypedDict): + """ + Custom state schema for the SurfSense deep agent. + + This extends the default agent state with custom fields. + The default state already includes: + - messages: Conversation history + - todos: Task list from TodoListMiddleware + - files: Virtual filesystem from FilesystemMiddleware + + We're adding fields needed for knowledge base search: + - search_space_id: The user's search space ID + - db_session: Database session (injected at runtime) + - connector_service: Connector service instance (injected at runtime) + """ + + search_space_id: int + # These are runtime-injected and won't be serialized + # db_session and connector_service are passed when invoking the agent + + +# ============================================================================= +# Knowledge Base Search Tool +# ============================================================================= + +# Canonical connector values used internally by ConnectorService +_ALL_CONNECTORS: list[str] = [ + "EXTENSION", + "FILE", + "SLACK_CONNECTOR", + "NOTION_CONNECTOR", + "YOUTUBE_VIDEO", + "GITHUB_CONNECTOR", + "ELASTICSEARCH_CONNECTOR", + "LINEAR_CONNECTOR", + "JIRA_CONNECTOR", + "CONFLUENCE_CONNECTOR", + "CLICKUP_CONNECTOR", + "GOOGLE_CALENDAR_CONNECTOR", + "GOOGLE_GMAIL_CONNECTOR", + "DISCORD_CONNECTOR", + "AIRTABLE_CONNECTOR", + "TAVILY_API", + "SEARXNG_API", + "LINKUP_API", + "BAIDU_SEARCH_API", + "LUMA_CONNECTOR", + "NOTE", + "BOOKSTACK_CONNECTOR", + "CRAWLED_URL", +] + + +def _normalize_connectors(connectors_to_search: list[str] | None) -> list[str]: + """ + Normalize connectors provided by the model. + + - Accepts user-facing enums like WEBCRAWLER_CONNECTOR and maps them to canonical + ConnectorService types. + - Drops unknown values. + - If None/empty, defaults to searching across all known connectors. + """ + if not connectors_to_search: + return list(_ALL_CONNECTORS) + + normalized: list[str] = [] + for raw in connectors_to_search: + c = (raw or "").strip().upper() + if not c: + continue + if c == "WEBCRAWLER_CONNECTOR": + c = "CRAWLED_URL" + normalized.append(c) + + # de-dupe while preserving order + filter unknown + seen: set[str] = set() + out: list[str] = [] + for c in normalized: + if c in seen: + continue + if c not in _ALL_CONNECTORS: + continue + seen.add(c) + out.append(c) + return out if out else list(_ALL_CONNECTORS) + + +SURFSENSE_CITATION_INSTRUCTIONS = """ + +CRITICAL CITATION REQUIREMENTS: + +1. For EVERY piece of information you include from the documents, add a citation in the format [citation:chunk_id] where chunk_id is the exact value from the `` tag inside ``. +2. Make sure ALL factual statements from the documents have proper citations. +3. If multiple chunks support the same point, include all relevant citations [citation:chunk_id1], [citation:chunk_id2]. +4. You MUST use the exact chunk_id values from the `` attributes. Do not create your own citation numbers. +5. Every citation MUST be in the format [citation:chunk_id] where chunk_id is the exact chunk id value. +6. Never modify or change the chunk_id - always use the original values exactly as provided in the chunk tags. +7. Do not return citations as clickable links. +8. Never format citations as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only. +9. Citations must ONLY appear as [citation:chunk_id] or [citation:chunk_id1], [citation:chunk_id2] format - never with parentheses, hyperlinks, or other formatting. +10. Never make up chunk IDs. Only use chunk_id values that are explicitly provided in the `` tags. +11. If you are unsure about a chunk_id, do not include a citation rather than guessing or making one up. + + +The documents you receive are structured like this: + + + + 42 + GITHUB_CONNECTOR + <![CDATA[Some repo / file / issue title]]> + + + + + + + + + + +IMPORTANT: You MUST cite using the chunk ids (e.g. 123, 124). Do NOT cite document_id. + + + +- Every fact from the documents must have a citation in the format [citation:chunk_id] where chunk_id is the EXACT id value from a `` tag +- Citations should appear at the end of the sentence containing the information they support +- Multiple citations should be separated by commas: [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3] +- No need to return references section. Just citations in answer. +- NEVER create your own citation format - use the exact chunk_id values from the documents in the [citation:chunk_id] format +- NEVER format citations as clickable links or as markdown links like "([citation:5](https://example.com))". Always use plain square brackets only +- NEVER make up chunk IDs if you are unsure about the chunk_id. It is better to omit the citation than to guess + + + +CORRECT citation formats: +- [citation:5] +- [citation:chunk_id1], [citation:chunk_id2], [citation:chunk_id3] + +INCORRECT citation formats (DO NOT use): +- Using parentheses and markdown links: ([citation:5](https://github.com/MODSetter/SurfSense)) +- Using parentheses around brackets: ([citation:5]) +- Using hyperlinked text: [link to source 5](https://example.com) +- Using footnote style: ... libraryΒΉ +- Making up source IDs when source_id is unknown +- Using old IEEE format: [1], [2], [3] +- Using source types instead of IDs: [citation:GITHUB_CONNECTOR] instead of [citation:5] + + + +Based on your GitHub repositories and video content, Python's asyncio library provides tools for writing concurrent code using the async/await syntax [citation:5]. It's particularly useful for I/O-bound and high-level structured network code [citation:5]. + +The key advantage of asyncio is that it can improve performance by allowing other code to run while waiting for I/O operations to complete [citation:12]. This makes it excellent for scenarios like web scraping, API calls, database operations, or any situation where your program spends time waiting for external resources. + +However, from your video learning, it's important to note that asyncio is not suitable for CPU-bound tasks as it runs on a single thread [citation:12]. For computationally intensive work, you'd want to use multiprocessing instead. + + +""" + + +def _parse_date_or_datetime(value: str) -> datetime: + """ + Parse either an ISO date (YYYY-MM-DD) or ISO datetime into an aware UTC datetime. + + - If `value` is a date, interpret it as start-of-day in UTC. + - If `value` is a datetime without timezone, assume UTC. + """ + raw = (value or "").strip() + if not raw: + raise ValueError("Empty date string") + + # Date-only + if "T" not in raw: + d = datetime.fromisoformat(raw).date() + return datetime(d.year, d.month, d.day, tzinfo=UTC) + + # Datetime (may be naive) + dt = datetime.fromisoformat(raw) + if dt.tzinfo is None: + return dt.replace(tzinfo=UTC) + return dt.astimezone(UTC) + + +def _resolve_date_range( + start_date: datetime | None, + end_date: datetime | None, +) -> tuple[datetime, datetime]: + """ + Resolve a date range, defaulting to the last 2 years if not provided. + Ensures start_date <= end_date. + """ + resolved_end = end_date or datetime.now(UTC) + resolved_start = start_date or (resolved_end - timedelta(days=730)) + + if resolved_start > resolved_end: + resolved_start, resolved_end = resolved_end, resolved_start + + return resolved_start, resolved_end + + +def format_documents_for_context(documents: list[dict[str, Any]]) -> str: + """ + Format retrieved documents into a readable context string for the LLM. + + Args: + documents: List of document dictionaries from connector search + + Returns: + Formatted string with document contents and metadata + """ + if not documents: + return "" + + # Group chunks by document id (preferred) to produce the XML structure. + # + # IMPORTANT: ConnectorService returns **document-grouped** results of the form: + # { + # "document": {...}, + # "chunks": [{"chunk_id": 123, "content": "..."}, ...], + # "source": "NOTION_CONNECTOR" | "FILE" | ... + # } + # + # We must preserve chunk_id so citations like [citation:123] are possible. + grouped: dict[str, dict[str, Any]] = {} + + for doc in documents: + document_info = (doc.get("document") or {}) if isinstance(doc, dict) else {} + metadata = ( + (document_info.get("metadata") or {}) + if isinstance(document_info, dict) + else {} + ) + if not metadata and isinstance(doc, dict): + # Some result shapes may place metadata at the top level. + metadata = doc.get("metadata") or {} + + source = ( + (doc.get("source") if isinstance(doc, dict) else None) + or metadata.get("document_type") + or "UNKNOWN" + ) + + # Document identity (prefer document_id; otherwise fall back to type+title+url) + document_id_val = document_info.get("id") + title = ( + document_info.get("title") or metadata.get("title") or "Untitled Document" + ) + url = ( + metadata.get("url") + or metadata.get("source") + or metadata.get("page_url") + or "" + ) + + doc_key = ( + str(document_id_val) + if document_id_val is not None + else f"{source}::{title}::{url}" + ) + + if doc_key not in grouped: + grouped[doc_key] = { + "document_id": document_id_val + if document_id_val is not None + else doc_key, + "document_type": metadata.get("document_type") or source, + "title": title, + "url": url, + "metadata": metadata, + "chunks": [], + } + + # Prefer document-grouped chunks if available + chunks_list = doc.get("chunks") if isinstance(doc, dict) else None + if isinstance(chunks_list, list) and chunks_list: + for ch in chunks_list: + if not isinstance(ch, dict): + continue + chunk_id = ch.get("chunk_id") or ch.get("id") + content = (ch.get("content") or "").strip() + if not content: + continue + grouped[doc_key]["chunks"].append( + {"chunk_id": chunk_id, "content": content} + ) + continue + + # Fallback: treat this as a flat chunk-like object + if not isinstance(doc, dict): + continue + chunk_id = doc.get("chunk_id") or doc.get("id") + content = (doc.get("content") or "").strip() + if not content: + continue + grouped[doc_key]["chunks"].append({"chunk_id": chunk_id, "content": content}) + + # Render XML expected by citation instructions + parts: list[str] = [] + for g in grouped.values(): + metadata_json = json.dumps(g["metadata"], ensure_ascii=False) + + parts.append("") + parts.append("") + parts.append(f" {g['document_id']}") + parts.append(f" {g['document_type']}") + parts.append(f" <![CDATA[{g['title']}]]>") + parts.append(f" ") + parts.append(f" ") + parts.append("") + parts.append("") + parts.append("") + + for ch in g["chunks"]: + ch_content = ch["content"] + ch_id = ch["chunk_id"] + if ch_id is None: + parts.append(f" ") + else: + parts.append(f" ") + + parts.append("") + parts.append("") + parts.append("") + + return "\n".join(parts).strip() + + +async def search_knowledge_base_async( + query: str, + search_space_id: int, + db_session: AsyncSession, + connector_service: ConnectorService, + connectors_to_search: list[str] | None = None, + top_k: int = 10, + start_date: datetime | None = None, + end_date: datetime | None = None, +) -> str: + """ + Search the user's knowledge base for relevant documents. + + This is the async implementation that searches across multiple connectors. + + Args: + query: The search query + search_space_id: The user's search space ID + db_session: Database session + connector_service: Initialized connector service + connectors_to_search: Optional list of connector types to search. If omitted, searches all. + top_k: Number of results per connector + start_date: Optional start datetime (UTC) for filtering documents + end_date: Optional end datetime (UTC) for filtering documents + + Returns: + Formatted string with search results + """ + all_documents = [] + + # Resolve date range (default last 2 years) + resolved_start_date, resolved_end_date = _resolve_date_range( + start_date=start_date, + end_date=end_date, + ) + + connectors = _normalize_connectors(connectors_to_search) + + for connector in connectors: + try: + if connector == "YOUTUBE_VIDEO": + _, chunks = await connector_service.search_youtube( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "EXTENSION": + _, chunks = await connector_service.search_extension( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "CRAWLED_URL": + _, chunks = await connector_service.search_crawled_urls( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "FILE": + _, chunks = await connector_service.search_files( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "SLACK_CONNECTOR": + _, chunks = await connector_service.search_slack( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "NOTION_CONNECTOR": + _, chunks = await connector_service.search_notion( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "GITHUB_CONNECTOR": + _, chunks = await connector_service.search_github( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "LINEAR_CONNECTOR": + _, chunks = await connector_service.search_linear( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "TAVILY_API": + _, chunks = await connector_service.search_tavily( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + ) + all_documents.extend(chunks) + + elif connector == "SEARXNG_API": + _, chunks = await connector_service.search_searxng( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + ) + all_documents.extend(chunks) + + elif connector == "LINKUP_API": + # Keep behavior aligned with researcher: default "standard" + _, chunks = await connector_service.search_linkup( + user_query=query, + search_space_id=search_space_id, + mode="standard", + ) + all_documents.extend(chunks) + + elif connector == "BAIDU_SEARCH_API": + _, chunks = await connector_service.search_baidu( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + ) + all_documents.extend(chunks) + + elif connector == "DISCORD_CONNECTOR": + _, chunks = await connector_service.search_discord( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "JIRA_CONNECTOR": + _, chunks = await connector_service.search_jira( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "GOOGLE_CALENDAR_CONNECTOR": + _, chunks = await connector_service.search_google_calendar( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "AIRTABLE_CONNECTOR": + _, chunks = await connector_service.search_airtable( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "GOOGLE_GMAIL_CONNECTOR": + _, chunks = await connector_service.search_google_gmail( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "CONFLUENCE_CONNECTOR": + _, chunks = await connector_service.search_confluence( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "CLICKUP_CONNECTOR": + _, chunks = await connector_service.search_clickup( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "LUMA_CONNECTOR": + _, chunks = await connector_service.search_luma( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "ELASTICSEARCH_CONNECTOR": + _, chunks = await connector_service.search_elasticsearch( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "NOTE": + _, chunks = await connector_service.search_notes( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + elif connector == "BOOKSTACK_CONNECTOR": + _, chunks = await connector_service.search_bookstack( + user_query=query, + search_space_id=search_space_id, + top_k=top_k, + start_date=resolved_start_date, + end_date=resolved_end_date, + ) + all_documents.extend(chunks) + + except Exception as e: + print(f"Error searching connector {connector}: {e}") + continue + + # Deduplicate by content hash + seen_doc_ids: set[Any] = set() + seen_hashes: set[int] = set() + deduplicated: list[dict[str, Any]] = [] + for doc in all_documents: + doc_id = (doc.get("document", {}) or {}).get("id") + content = (doc.get("content", "") or "").strip() + content_hash = hash(content) + + if (doc_id and doc_id in seen_doc_ids) or content_hash in seen_hashes: + continue + + if doc_id: + seen_doc_ids.add(doc_id) + seen_hashes.add(content_hash) + deduplicated.append(doc) + + return format_documents_for_context(deduplicated) + + +def create_search_knowledge_base_tool( + search_space_id: int, + db_session: AsyncSession, + connector_service: ConnectorService, +): + """ + Factory function to create the search_knowledge_base tool with injected dependencies. + + Args: + search_space_id: The user's search space ID + db_session: Database session + connector_service: Initialized connector service + connectors_to_search: List of connector types to search + + Returns: + A configured tool function + """ + + @tool + async def search_knowledge_base( + query: str, + top_k: int = 10, + start_date: str | None = None, + end_date: str | None = None, + connectors_to_search: list[str] | None = None, + ) -> str: + """ + Search the user's personal knowledge base for relevant information. + + Use this tool to find documents, notes, files, web pages, and other content + that may help answer the user's question. + + IMPORTANT: + - If the user requests a specific source type (e.g. "my notes", "Slack messages"), + pass `connectors_to_search=[...]` using the enums below. + - If `connectors_to_search` is omitted/empty, the system will search broadly. + + ## Available connector enums for `connectors_to_search` + + - EXTENSION: "Web content saved via SurfSense browser extension" (personal browsing history) + - FILE: "User-uploaded documents (PDFs, Word, etc.)" (personal files) + - NOTE: "SurfSense Notes" (notes created inside SurfSense) + - SLACK_CONNECTOR: "Slack conversations and shared content" (personal workspace communications) + - NOTION_CONNECTOR: "Notion workspace pages and databases" (personal knowledge management) + - YOUTUBE_VIDEO: "YouTube video transcripts and metadata" (personally saved videos) + - GITHUB_CONNECTOR: "GitHub repository content and issues" (personal repositories and interactions) + - ELASTICSEARCH_CONNECTOR: "Elasticsearch indexed documents and data" (personal Elasticsearch instances and custom data sources) + - LINEAR_CONNECTOR: "Linear project issues and discussions" (personal project management) + - JIRA_CONNECTOR: "Jira project issues, tickets, and comments" (personal project tracking) + - CONFLUENCE_CONNECTOR: "Confluence pages and comments" (personal project documentation) + - CLICKUP_CONNECTOR: "ClickUp tasks and project data" (personal task management) + - GOOGLE_CALENDAR_CONNECTOR: "Google Calendar events, meetings, and schedules" (personal calendar and time management) + - GOOGLE_GMAIL_CONNECTOR: "Google Gmail emails and conversations" (personal emails and communications) + - DISCORD_CONNECTOR: "Discord server conversations and shared content" (personal community communications) + - AIRTABLE_CONNECTOR: "Airtable records, tables, and database content" (personal data management and organization) + - TAVILY_API: "Tavily search API results" (personalized search results) + - SEARXNG_API: "SearxNG search API results" (personalized search results) + - LINKUP_API: "Linkup search API results" (personalized search results) + - BAIDU_SEARCH_API: "Baidu search API results" (personalized search results) + - LUMA_CONNECTOR: "Luma events" + - WEBCRAWLER_CONNECTOR: "Webpages indexed by SurfSense" (personally selected websites) + - BOOKSTACK_CONNECTOR: "BookStack pages" (personal documentation) + + NOTE: `WEBCRAWLER_CONNECTOR` is mapped internally to the canonical document type `CRAWLED_URL`. + + Args: + query: The search query - be specific and include key terms + top_k: Number of results to retrieve (default: 10) + start_date: Optional ISO date/datetime (e.g. "2025-12-12" or "2025-12-12T00:00:00+00:00") + end_date: Optional ISO date/datetime (e.g. "2025-12-19" or "2025-12-19T23:59:59+00:00") + connectors_to_search: Optional list of connector enums to search. If omitted, searches all. + + Returns: + Formatted string with relevant documents and their content + """ + parsed_start: datetime | None = None + parsed_end: datetime | None = None + + if start_date: + parsed_start = _parse_date_or_datetime(start_date) + if end_date: + parsed_end = _parse_date_or_datetime(end_date) + + return await search_knowledge_base_async( + query=query, + search_space_id=search_space_id, + db_session=db_session, + connector_service=connector_service, + connectors_to_search=connectors_to_search, + top_k=top_k, + start_date=parsed_start, + end_date=parsed_end, + ) + + return search_knowledge_base + + +# ============================================================================= +# System Prompt +# ============================================================================= + + +def build_surfsense_system_prompt(today: datetime | None = None) -> str: + resolved_today = (today or datetime.now(UTC)).astimezone(UTC).date().isoformat() + + return f""" + +You are SurfSense, a reasoning and acting AI agent designed to answer user questions using the user's personal knowledge base. + +Today's date (UTC): {resolved_today} + + + +You have access to the following tools: +- search_knowledge_base: Search the user's personal knowledge base for relevant information. + - Args: + - query: The search query - be specific and include key terms + - top_k: Number of results to retrieve (default: 10) + - start_date: Optional ISO date/datetime (e.g. "2025-12-12" or "2025-12-12T00:00:00+00:00") + - end_date: Optional ISO date/datetime (e.g. "2025-12-19" or "2025-12-19T23:59:59+00:00") + - connectors_to_search: Optional list of connector enums to search. If omitted, searches all. + - Returns: Formatted string with relevant documents and their content + + +- User: "Fetch all my notes and what's in them?" + - Call: `search_knowledge_base(query="*", top_k=50, connectors_to_search=["NOTE"])` + +- User: "What did I discuss on Slack last week about the React migration?" + - Call: `search_knowledge_base(query="React migration", connectors_to_search=["SLACK_CONNECTOR"], start_date="YYYY-MM-DD", end_date="YYYY-MM-DD")` + + +{SURFSENSE_CITATION_INSTRUCTIONS} +""" + + +SURFSENSE_SYSTEM_PROMPT = build_surfsense_system_prompt() + + +# ============================================================================= +# Deep Agent Factory +# ============================================================================= + + +def create_surfsense_deep_agent( + llm: ChatLiteLLM, + search_space_id: int, + db_session: AsyncSession, + connector_service: ConnectorService, +): + """ + Create a SurfSense deep agent with knowledge base search capability. + + Args: + llm: ChatLiteLLM instance + search_space_id: The user's search space ID + db_session: Database session + connector_service: Initialized connector service + connectors_to_search: List of connector types to search (default: common connectors) + + Returns: + CompiledStateGraph: The configured deep agent + """ + # Create the search tool with injected dependencies + search_tool = create_search_knowledge_base_tool( + search_space_id=search_space_id, + db_session=db_session, + connector_service=connector_service, + ) + + # Create the deep agent + agent = create_deep_agent( + model=llm, + tools=[search_tool], + system_prompt=build_surfsense_system_prompt(), + context_schema=SurfSenseContextSchema, + ) + + return agent + + +# ============================================================================= +# Test Runner +# ============================================================================= + + +async def run_test(): + """Run a basic test of the deep agent.""" + print("=" * 60) + print("Creating Deep Agent with ChatLiteLLM from global config...") + print("=" * 60) + + # Create ChatLiteLLM from global config + # Use global LLM config by id (negative ids are reserved for global configs) + llm_config = load_llm_config_from_yaml(llm_config_id=-2) + if not llm_config: + raise ValueError("Failed to load LLM config from YAML") + llm = create_chat_litellm_from_config(llm_config) + if not llm: + raise ValueError("Failed to create ChatLiteLLM instance") + + # Create a real DB session + ConnectorService, then build the full SurfSense agent. + async with async_session_maker() as session: + # Use the known dev search space id + search_space_id = 5 + + connector_service = ConnectorService(session, search_space_id=search_space_id) + + agent = create_surfsense_deep_agent( + llm=llm, + search_space_id=search_space_id, + db_session=session, + connector_service=connector_service, + ) + + print("\nAgent created successfully!") + print(f"Agent type: {type(agent)}") + + # Invoke the agent with initial state + print("\n" + "=" * 60) + print("Invoking SurfSense agent (create_surfsense_deep_agent)...") + print("=" * 60) + + initial_state = { + "messages": [HumanMessage(content=("What are my notes from last 3 days?"))], + "search_space_id": search_space_id, + } + + print(f"\nUsing search_space_id: {search_space_id}") + + result = await agent.ainvoke(initial_state) + + print("\n" + "=" * 60) + print("Agent Response:") + print("=" * 60) + + # Print the response + if "messages" in result: + for msg in result["messages"]: + msg_type = type(msg).__name__ + content = msg.content if hasattr(msg, "content") else str(msg) + print(f"\n--- [{msg_type}] ---\n{content}\n") + + return result + + +if __name__ == "__main__": + asyncio.run(run_test()) diff --git a/surfsense_backend/app/agents/researcher/nodes.py b/surfsense_backend/app/agents/researcher/nodes.py index 35857c004..b16d4f0c1 100644 --- a/surfsense_backend/app/agents/researcher/nodes.py +++ b/surfsense_backend/app/agents/researcher/nodes.py @@ -1163,6 +1163,33 @@ async def fetch_relevant_documents( } ) + elif connector == "BOOKSTACK_CONNECTOR": + ( + source_object, + bookstack_chunks, + ) = await connector_service.search_bookstack( + user_query=reformulated_query, + search_space_id=search_space_id, + top_k=top_k, + start_date=start_date, + end_date=end_date, + ) + + # Add to sources and raw documents + if source_object: + all_sources.append(source_object) + all_raw_documents.extend(bookstack_chunks) + + # Stream found document count + if streaming_service and writer: + writer( + { + "yield_value": streaming_service.format_terminal_info_delta( + f"πŸ“š Found {len(bookstack_chunks)} BookStack pages related to your query" + ) + } + ) + elif connector == "NOTE": ( source_object, diff --git a/surfsense_backend/app/agents/researcher/utils.py b/surfsense_backend/app/agents/researcher/utils.py index ea3c99547..9cb0dcbde 100644 --- a/surfsense_backend/app/agents/researcher/utils.py +++ b/surfsense_backend/app/agents/researcher/utils.py @@ -34,6 +34,7 @@ def get_connector_emoji(connector_name: str) -> str: "LUMA_CONNECTOR": "✨", "ELASTICSEARCH_CONNECTOR": "⚑", "WEBCRAWLER_CONNECTOR": "🌐", + "BOOKSTACK_CONNECTOR": "πŸ“š", "NOTE": "πŸ“", } return connector_emojis.get(connector_name, "πŸ”Ž") @@ -60,6 +61,7 @@ def get_connector_friendly_name(connector_name: str) -> str: "LUMA_CONNECTOR": "Luma", "ELASTICSEARCH_CONNECTOR": "Elasticsearch", "WEBCRAWLER_CONNECTOR": "Web Pages", + "BOOKSTACK_CONNECTOR": "BookStack", "NOTE": "Notes", } return connector_friendly_names.get(connector_name, connector_name) diff --git a/surfsense_backend/app/connectors/webcrawler_connector.py b/surfsense_backend/app/connectors/webcrawler_connector.py index edd7f8800..3fc61f0b5 100644 --- a/surfsense_backend/app/connectors/webcrawler_connector.py +++ b/surfsense_backend/app/connectors/webcrawler_connector.py @@ -5,12 +5,17 @@ A module for crawling web pages and extracting content using Firecrawl or AsyncC Provides a unified interface for web scraping. """ +import logging from typing import Any +import trafilatura import validators +from fake_useragent import UserAgent from firecrawl import AsyncFirecrawlApp from langchain_community.document_loaders import AsyncChromiumLoader +logger = logging.getLogger(__name__) + class WebCrawlerConnector: """Class for crawling web pages and extracting content.""" @@ -121,7 +126,8 @@ class WebCrawlerConnector: async def _crawl_with_chromium(self, url: str) -> dict[str, Any]: """ - Crawl URL using AsyncChromiumLoader. + Crawl URL using AsyncChromiumLoader with Trafilatura for content extraction. + Falls back to raw HTML if Trafilatura extraction fails. Args: url: URL to crawl @@ -132,33 +138,106 @@ class WebCrawlerConnector: Raises: Exception: If crawling fails """ - crawl_loader = AsyncChromiumLoader(urls=[url], headless=True) + # Generate a realistic User-Agent to avoid bot detection + ua = UserAgent() + user_agent = ua.random + + # Pass User-Agent to AsyncChromiumLoader + crawl_loader = AsyncChromiumLoader( + urls=[url], headless=True, user_agent=user_agent + ) documents = await crawl_loader.aload() if not documents: raise ValueError(f"Failed to load content from {url}") doc = documents[0] + raw_html = doc.page_content # Extract basic metadata from the document - metadata = doc.metadata if doc.metadata else {} + base_metadata = doc.metadata if doc.metadata else {} + + # Try to extract main content using Trafilatura + extracted_content = None + trafilatura_metadata = None + + try: + logger.info( + f"Attempting to extract main content from {url} using Trafilatura" + ) + + # Extract main content as markdown + extracted_content = trafilatura.extract( + raw_html, + output_format="markdown", # Get clean markdown + include_comments=False, # Exclude comments + include_tables=True, # Keep tables + include_images=True, # Keep image references + include_links=True, # Keep links + ) + + # Extract metadata using Trafilatura + trafilatura_metadata = trafilatura.extract_metadata(raw_html) + + if extracted_content and len(extracted_content.strip()) > 0: + logger.info( + f"Successfully extracted main content from {url} using Trafilatura " + f"({len(extracted_content)} chars vs {len(raw_html)} chars raw HTML)" + ) + else: + logger.warning( + f"Trafilatura extraction returned empty content for {url}, " + "falling back to raw HTML" + ) + extracted_content = None + + except Exception as e: + logger.warning( + f"Trafilatura extraction failed for {url}: {e}. " + "Falling back to raw HTML" + ) + extracted_content = None + + # Build metadata, preferring Trafilatura metadata when available + metadata = { + "source": url, + "title": ( + trafilatura_metadata.title + if trafilatura_metadata and trafilatura_metadata.title + else base_metadata.get("title", url) + ), + } + + # Add additional metadata from Trafilatura if available + if trafilatura_metadata: + if trafilatura_metadata.description: + metadata["description"] = trafilatura_metadata.description + if trafilatura_metadata.author: + metadata["author"] = trafilatura_metadata.author + if trafilatura_metadata.date: + metadata["date"] = trafilatura_metadata.date + + # Add any remaining base metadata + metadata.update(base_metadata) return { - "content": doc.page_content, - "metadata": { - "source": url, - "title": metadata.get("title", url), - **metadata, - }, + "content": extracted_content if extracted_content else raw_html, + "metadata": metadata, "crawler_type": "chromium", } - def format_to_structured_document(self, crawl_result: dict[str, Any]) -> str: + def format_to_structured_document( + self, crawl_result: dict[str, Any], exclude_metadata: bool = False + ) -> str: """ Format crawl result as a structured document. Args: crawl_result: Result from crawl_url method + exclude_metadata: If True, excludes ALL metadata fields from the document. + This is useful for content hash generation to ensure the hash + only changes when actual content changes, not when metadata + (which often contains dynamic fields like timestamps, IDs, etc.) changes. Returns: Structured document string @@ -166,15 +245,17 @@ class WebCrawlerConnector: metadata = crawl_result["metadata"] content = crawl_result["content"] - document_parts = ["", ""] + document_parts = [""] - # Add all metadata fields - for key, value in metadata.items(): - document_parts.append(f"{key.upper()}: {value}") + # Include metadata section only if not excluded + if not exclude_metadata: + document_parts.append("") + for key, value in metadata.items(): + document_parts.append(f"{key.upper()}: {value}") + document_parts.append("") document_parts.extend( [ - "", "", "FORMAT: markdown", "TEXT_START", diff --git a/surfsense_backend/app/routes/chats_routes.py b/surfsense_backend/app/routes/chats_routes.py index 766c3f299..2813201e7 100644 --- a/surfsense_backend/app/routes/chats_routes.py +++ b/surfsense_backend/app/routes/chats_routes.py @@ -20,8 +20,13 @@ from app.schemas import ( ChatRead, ChatReadWithoutMessages, ChatUpdate, + NewChatRequest, ) -from app.tasks.stream_connector_search_results import stream_connector_search_results +from app.services.new_streaming_service import VercelStreamingService +from app.tasks.chat.stream_connector_search_results import ( + stream_connector_search_results, +) +from app.tasks.chat.stream_new_chat import stream_new_chat from app.users import current_active_user from app.utils.rbac import check_permission from app.utils.validators import ( @@ -152,6 +157,87 @@ async def handle_chat_data( return response +@router.post("/new_chat") +async def handle_new_chat( + request: NewChatRequest, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + Handle new chat requests using the SurfSense deep agent. + + This endpoint uses the new deep agent with the Vercel AI SDK + Data Stream Protocol (SSE format). + + Args: + request: NewChatRequest containing chat_id, user_query, and search_space_id + session: Database session + user: Current authenticated user + + Returns: + StreamingResponse with SSE formatted data + """ + # Validate the user query + if not request.user_query or not request.user_query.strip(): + raise HTTPException(status_code=400, detail="User query cannot be empty") + + # Check if the user has chat access to the search space + try: + await check_permission( + session, + user, + request.search_space_id, + Permission.CHATS_CREATE.value, + "You don't have permission to use chat in this search space", + ) + except HTTPException: + raise HTTPException( + status_code=403, detail="You don't have access to this search space" + ) from None + + # Get LLM config ID from search space preferences (optional enhancement) + # For now, we use the default global config (-1) + llm_config_id = -1 + + # Optionally load LLM preferences from search space + try: + search_space_result = await session.execute( + select(SearchSpace).filter(SearchSpace.id == request.search_space_id) + ) + search_space = search_space_result.scalars().first() + + if search_space: + # Use strategic_llm_id if available, otherwise fall back to fast_llm_id + if search_space.strategic_llm_id is not None: + llm_config_id = search_space.strategic_llm_id + elif search_space.fast_llm_id is not None: + llm_config_id = search_space.fast_llm_id + except Exception: + # Fall back to default config on any error + pass + + # Create the streaming response + # chat_id is used as LangGraph's thread_id for automatic chat history management + response = StreamingResponse( + stream_new_chat( + user_query=request.user_query.strip(), + user_id=user.id, + search_space_id=request.search_space_id, + chat_id=request.chat_id, + session=session, + llm_config_id=llm_config_id, + ), + media_type="text/event-stream", + ) + + # Set the required headers for Vercel AI SDK + headers = VercelStreamingService.get_response_headers() + for key, value in headers.items(): + response.headers[key] = value + + return response + + @router.post("/chats", response_model=ChatRead) async def create_chat( chat: ChatCreate, diff --git a/surfsense_backend/app/schemas/__init__.py b/surfsense_backend/app/schemas/__init__.py index d48d1b7f3..a4308f6a2 100644 --- a/surfsense_backend/app/schemas/__init__.py +++ b/surfsense_backend/app/schemas/__init__.py @@ -6,6 +6,7 @@ from .chats import ( ChatRead, ChatReadWithoutMessages, ChatUpdate, + NewChatRequest, ) from .chunks import ChunkBase, ChunkCreate, ChunkRead, ChunkUpdate from .documents import ( @@ -97,6 +98,7 @@ __all__ = [ "MembershipRead", "MembershipReadWithUser", "MembershipUpdate", + "NewChatRequest", "PaginatedResponse", "PermissionInfo", "PermissionsListResponse", diff --git a/surfsense_backend/app/schemas/chats.py b/surfsense_backend/app/schemas/chats.py index 64ce73e9e..6c281afbc 100644 --- a/surfsense_backend/app/schemas/chats.py +++ b/surfsense_backend/app/schemas/chats.py @@ -48,6 +48,14 @@ class AISDKChatRequest(BaseModel): data: dict[str, Any] | None = None +class NewChatRequest(BaseModel): + """Request schema for the new deep agent chat endpoint.""" + + chat_id: int + user_query: str + search_space_id: int + + class ChatCreate(ChatBase): pass diff --git a/surfsense_backend/app/services/connector_service.py b/surfsense_backend/app/services/connector_service.py index 61bc08b40..3a6dcc605 100644 --- a/surfsense_backend/app/services/connector_service.py +++ b/surfsense_backend/app/services/connector_service.py @@ -252,24 +252,28 @@ class ConnectorService: # Get more results from each retriever for better fusion retriever_top_k = top_k * 2 - # Run both searches in parallel - chunk_results, doc_results = await asyncio.gather( - self.chunk_retriever.hybrid_search( - query_text=query_text, - top_k=retriever_top_k, - search_space_id=search_space_id, - document_type=document_type, - start_date=start_date, - end_date=end_date, - ), - self.document_retriever.hybrid_search( - query_text=query_text, - top_k=retriever_top_k, - search_space_id=search_space_id, - document_type=document_type, - start_date=start_date, - end_date=end_date, - ), + # IMPORTANT: + # These retrievers share the same AsyncSession. AsyncSession does not permit + # concurrent awaits that require DB IO on the same session/connection. + # Running these in parallel can raise: + # "This session is provisioning a new connection; concurrent operations are not permitted" + # + # So we run them sequentially. + chunk_results = await self.chunk_retriever.hybrid_search( + query_text=query_text, + top_k=retriever_top_k, + search_space_id=search_space_id, + document_type=document_type, + start_date=start_date, + end_date=end_date, + ) + doc_results = await self.document_retriever.hybrid_search( + query_text=query_text, + top_k=retriever_top_k, + search_space_id=search_space_id, + document_type=document_type, + start_date=start_date, + end_date=end_date, ) # Helper to extract document_id from our doc-grouped result @@ -2432,7 +2436,6 @@ class ConnectorService: async def search_bookstack( self, user_query: str, - user_id: str, search_space_id: int, top_k: int = 20, start_date: datetime | None = None, diff --git a/surfsense_backend/app/services/new_streaming_service.py b/surfsense_backend/app/services/new_streaming_service.py new file mode 100644 index 000000000..f0f05cdb6 --- /dev/null +++ b/surfsense_backend/app/services/new_streaming_service.py @@ -0,0 +1,699 @@ +""" +Vercel AI SDK Data Stream Protocol Implementation + +This module implements the Vercel AI SDK streaming protocol for use with +@ai-sdk/react's useChat and useCompletion hooks. + +Protocol Reference: +- Uses Server-Sent Events (SSE) format +- Requires 'x-vercel-ai-ui-message-stream: v1' header +- Supports text, reasoning, sources, files, tools, data, and error parts +""" + +import json +import uuid +from dataclasses import dataclass, field +from typing import Any + + +def generate_id() -> str: + """Generate a unique ID for stream parts.""" + return f"msg_{uuid.uuid4().hex}" + + +@dataclass +class StreamContext: + """ + Maintains context for streaming operations. + Tracks active text and reasoning blocks. + """ + + message_id: str = field(default_factory=generate_id) + active_text_id: str | None = None + active_reasoning_id: str | None = None + step_count: int = 0 + + +class VercelStreamingService: + """ + Implements the Vercel AI SDK Data Stream Protocol. + + This service formats messages according to the SSE-based protocol + that the AI SDK frontend expects. All messages are formatted as: + data: {json_object}\n\n + + Usage: + service = VercelStreamingService() + + # Start a message + yield service.format_message_start() + + # Stream text content + text_id = service.generate_text_id() + yield service.format_text_start(text_id) + yield service.format_text_delta(text_id, "Hello, ") + yield service.format_text_delta(text_id, "world!") + yield service.format_text_end(text_id) + + # Finish the message + yield service.format_finish() + yield service.format_done() + """ + + def __init__(self): + self.context = StreamContext() + + @staticmethod + def get_response_headers() -> dict[str, str]: + """ + Get the required HTTP headers for Vercel AI SDK streaming. + + Returns: + dict: Headers to include in the streaming response + """ + return { + "Content-Type": "text/event-stream", + "Cache-Control": "no-cache", + "Connection": "keep-alive", + "x-vercel-ai-ui-message-stream": "v1", + } + + @staticmethod + def _format_sse(data: Any) -> str: + """ + Format data as a Server-Sent Event. + + Args: + data: The data to format (will be JSON serialized if not a string) + + Returns: + str: SSE formatted string + """ + if isinstance(data, str): + return f"data: {data}\n\n" + return f"data: {json.dumps(data)}\n\n" + + @staticmethod + def generate_text_id() -> str: + """Generate a unique ID for a text block.""" + return f"text_{uuid.uuid4().hex}" + + @staticmethod + def generate_reasoning_id() -> str: + """Generate a unique ID for a reasoning block.""" + return f"reasoning_{uuid.uuid4().hex}" + + @staticmethod + def generate_tool_call_id() -> str: + """Generate a unique ID for a tool call.""" + return f"call_{uuid.uuid4().hex}" + + # ========================================================================= + # Message Lifecycle Parts + # ========================================================================= + + def format_message_start(self, message_id: str | None = None) -> str: + """ + Format the start of a new message. + + Args: + message_id: Optional custom message ID. If not provided, one is generated. + + Returns: + str: SSE formatted message start part + + Example output: + data: {"type":"start","messageId":"msg_abc123"} + """ + if message_id: + self.context.message_id = message_id + else: + self.context.message_id = generate_id() + + return self._format_sse({"type": "start", "messageId": self.context.message_id}) + + def format_finish(self) -> str: + """ + Format the finish message part. + + Returns: + str: SSE formatted finish part + + Example output: + data: {"type":"finish"} + """ + return self._format_sse({"type": "finish"}) + + def format_done(self) -> str: + """ + Format the stream termination marker. + + This should be the last thing sent in a stream. + + Returns: + str: SSE formatted done marker + + Example output: + data: [DONE] + """ + return "data: [DONE]\n\n" + + # ========================================================================= + # Text Parts (start/delta/end pattern) + # ========================================================================= + + def format_text_start(self, text_id: str | None = None) -> str: + """ + Format the start of a text block. + + Args: + text_id: Optional custom text block ID. If not provided, one is generated. + + Returns: + str: SSE formatted text start part + + Example output: + data: {"type":"text-start","id":"text_abc123"} + """ + if text_id is None: + text_id = self.generate_text_id() + self.context.active_text_id = text_id + return self._format_sse({"type": "text-start", "id": text_id}) + + def format_text_delta(self, text_id: str, delta: str) -> str: + """ + Format a text delta (incremental content). + + Args: + text_id: The text block ID + delta: The incremental text content + + Returns: + str: SSE formatted text delta part + + Example output: + data: {"type":"text-delta","id":"text_abc123","delta":"Hello"} + """ + return self._format_sse({"type": "text-delta", "id": text_id, "delta": delta}) + + def format_text_end(self, text_id: str) -> str: + """ + Format the end of a text block. + + Args: + text_id: The text block ID + + Returns: + str: SSE formatted text end part + + Example output: + data: {"type":"text-end","id":"text_abc123"} + """ + if self.context.active_text_id == text_id: + self.context.active_text_id = None + return self._format_sse({"type": "text-end", "id": text_id}) + + def stream_text(self, text_id: str, text: str, chunk_size: int = 10) -> list[str]: + """ + Convenience method to stream text in chunks. + + Args: + text_id: The text block ID + text: The full text to stream + chunk_size: Size of each chunk (default 10 characters) + + Returns: + list[str]: List of SSE formatted text delta parts + """ + parts = [] + for i in range(0, len(text), chunk_size): + chunk = text[i : i + chunk_size] + parts.append(self.format_text_delta(text_id, chunk)) + return parts + + # ========================================================================= + # Reasoning Parts (start/delta/end pattern) + # ========================================================================= + + def format_reasoning_start(self, reasoning_id: str | None = None) -> str: + """ + Format the start of a reasoning block. + + Args: + reasoning_id: Optional custom reasoning block ID. + + Returns: + str: SSE formatted reasoning start part + + Example output: + data: {"type":"reasoning-start","id":"reasoning_abc123"} + """ + if reasoning_id is None: + reasoning_id = self.generate_reasoning_id() + self.context.active_reasoning_id = reasoning_id + return self._format_sse({"type": "reasoning-start", "id": reasoning_id}) + + def format_reasoning_delta(self, reasoning_id: str, delta: str) -> str: + """ + Format a reasoning delta (incremental reasoning content). + + Args: + reasoning_id: The reasoning block ID + delta: The incremental reasoning content + + Returns: + str: SSE formatted reasoning delta part + + Example output: + data: {"type":"reasoning-delta","id":"reasoning_abc123","delta":"Let me think..."} + """ + return self._format_sse( + {"type": "reasoning-delta", "id": reasoning_id, "delta": delta} + ) + + def format_reasoning_end(self, reasoning_id: str) -> str: + """ + Format the end of a reasoning block. + + Args: + reasoning_id: The reasoning block ID + + Returns: + str: SSE formatted reasoning end part + + Example output: + data: {"type":"reasoning-end","id":"reasoning_abc123"} + """ + if self.context.active_reasoning_id == reasoning_id: + self.context.active_reasoning_id = None + return self._format_sse({"type": "reasoning-end", "id": reasoning_id}) + + # ========================================================================= + # Source Parts + # ========================================================================= + + def format_source_url( + self, url: str, source_id: str | None = None, title: str | None = None + ) -> str: + """ + Format a source URL reference. + + Args: + url: The source URL + source_id: Optional source identifier (defaults to URL) + title: Optional title for the source + + Returns: + str: SSE formatted source URL part + + Example output: + data: {"type":"source-url","sourceId":"https://example.com","url":"https://example.com"} + """ + data: dict[str, Any] = { + "type": "source-url", + "sourceId": source_id or url, + "url": url, + } + if title: + data["title"] = title + return self._format_sse(data) + + def format_source_document( + self, + source_id: str, + media_type: str = "file", + title: str | None = None, + description: str | None = None, + ) -> str: + """ + Format a source document reference. + + Args: + source_id: The source identifier + media_type: The media type (e.g., "file", "pdf", "document") + title: Optional title for the document + description: Optional description + + Returns: + str: SSE formatted source document part + + Example output: + data: {"type":"source-document","sourceId":"doc_123","mediaType":"file","title":"Report"} + """ + data: dict[str, Any] = { + "type": "source-document", + "sourceId": source_id, + "mediaType": media_type, + } + if title: + data["title"] = title + if description: + data["description"] = description + return self._format_sse(data) + + def format_sources(self, sources: list[dict[str, Any]]) -> list[str]: + """ + Format multiple sources. + + Args: + sources: List of source objects with 'url', 'title', 'type' fields + + Returns: + list[str]: List of SSE formatted source parts + """ + parts = [] + for source in sources: + url = source.get("url") + if url: + parts.append( + self.format_source_url( + url=url, + source_id=source.get("id", url), + title=source.get("title"), + ) + ) + else: + parts.append( + self.format_source_document( + source_id=source.get("id", ""), + media_type=source.get("type", "file"), + title=source.get("title"), + description=source.get("description"), + ) + ) + return parts + + # ========================================================================= + # File Part + # ========================================================================= + + def format_file(self, url: str, media_type: str) -> str: + """ + Format a file reference. + + Args: + url: The file URL + media_type: The MIME type (e.g., "image/png", "application/pdf") + + Returns: + str: SSE formatted file part + + Example output: + data: {"type":"file","url":"https://example.com/file.png","mediaType":"image/png"} + """ + return self._format_sse({"type": "file", "url": url, "mediaType": media_type}) + + # ========================================================================= + # Custom Data Parts + # ========================================================================= + + def format_data(self, data_type: str, data: Any) -> str: + """ + Format custom data with a type-specific suffix. + + The type will be prefixed with 'data-' automatically. + + Args: + data_type: The custom data type suffix (e.g., "weather", "chart") + data: The data payload + + Returns: + str: SSE formatted data part + + Example output: + data: {"type":"data-weather","data":{"location":"SF","temperature":100}} + """ + return self._format_sse({"type": f"data-{data_type}", "data": data}) + + def format_terminal_info(self, text: str, message_type: str = "info") -> str: + """ + Format terminal info as custom data (SurfSense specific). + + Args: + text: The terminal message text + message_type: The message type (info, error, success, warning) + + Returns: + str: SSE formatted terminal info data part + """ + return self.format_data("terminal-info", {"text": text, "type": message_type}) + + def format_further_questions(self, questions: list[str]) -> str: + """ + Format further questions as custom data (SurfSense specific). + + Args: + questions: List of suggested follow-up questions + + Returns: + str: SSE formatted further questions data part + """ + return self.format_data("further-questions", {"questions": questions}) + + # ========================================================================= + # Error Part + # ========================================================================= + + def format_error(self, error_text: str) -> str: + """ + Format an error message. + + Args: + error_text: The error message text + + Returns: + str: SSE formatted error part + + Example output: + data: {"type":"error","errorText":"Something went wrong"} + """ + return self._format_sse({"type": "error", "errorText": error_text}) + + # ========================================================================= + # Tool Parts + # ========================================================================= + + def format_tool_input_start(self, tool_call_id: str, tool_name: str) -> str: + """ + Format the start of tool input streaming. + + Args: + tool_call_id: The unique tool call identifier + tool_name: The name of the tool being called + + Returns: + str: SSE formatted tool input start part + + Example output: + data: {"type":"tool-input-start","toolCallId":"call_abc123","toolName":"getWeather"} + """ + return self._format_sse( + { + "type": "tool-input-start", + "toolCallId": tool_call_id, + "toolName": tool_name, + } + ) + + def format_tool_input_delta(self, tool_call_id: str, input_text_delta: str) -> str: + """ + Format incremental tool input. + + Args: + tool_call_id: The tool call identifier + input_text_delta: The incremental input text + + Returns: + str: SSE formatted tool input delta part + + Example output: + data: {"type":"tool-input-delta","toolCallId":"call_abc123","inputTextDelta":"San Fran"} + """ + return self._format_sse( + { + "type": "tool-input-delta", + "toolCallId": tool_call_id, + "inputTextDelta": input_text_delta, + } + ) + + def format_tool_input_available( + self, tool_call_id: str, tool_name: str, input_data: dict[str, Any] + ) -> str: + """ + Format the completion of tool input. + + Args: + tool_call_id: The tool call identifier + tool_name: The name of the tool + input_data: The complete tool input parameters + + Returns: + str: SSE formatted tool input available part + + Example output: + data: {"type":"tool-input-available","toolCallId":"call_abc123","toolName":"getWeather","input":{"city":"SF"}} + """ + return self._format_sse( + { + "type": "tool-input-available", + "toolCallId": tool_call_id, + "toolName": tool_name, + "input": input_data, + } + ) + + def format_tool_output_available(self, tool_call_id: str, output: Any) -> str: + """ + Format tool execution output. + + Args: + tool_call_id: The tool call identifier + output: The tool execution result + + Returns: + str: SSE formatted tool output available part + + Example output: + data: {"type":"tool-output-available","toolCallId":"call_abc123","output":{"weather":"sunny"}} + """ + return self._format_sse( + { + "type": "tool-output-available", + "toolCallId": tool_call_id, + "output": output, + } + ) + + # ========================================================================= + # Step Parts + # ========================================================================= + + def format_start_step(self) -> str: + """ + Format the start of a step (one LLM API call). + + Returns: + str: SSE formatted start step part + + Example output: + data: {"type":"start-step"} + """ + self.context.step_count += 1 + return self._format_sse({"type": "start-step"}) + + def format_finish_step(self) -> str: + """ + Format the completion of a step. + + This is necessary for correctly processing multiple stitched + assistant calls, e.g., when calling tools in the backend. + + Returns: + str: SSE formatted finish step part + + Example output: + data: {"type":"finish-step"} + """ + return self._format_sse({"type": "finish-step"}) + + # ========================================================================= + # Convenience Methods + # ========================================================================= + + def stream_full_text(self, text: str, chunk_size: int = 10) -> list[str]: + """ + Convenience method to stream a complete text block. + + Generates: text-start, text-deltas, text-end + + Args: + text: The full text to stream + chunk_size: Size of each chunk + + Returns: + list[str]: List of all SSE formatted parts + """ + text_id = self.generate_text_id() + parts = [self.format_text_start(text_id)] + parts.extend(self.stream_text(text_id, text, chunk_size)) + parts.append(self.format_text_end(text_id)) + return parts + + def stream_full_reasoning(self, reasoning: str, chunk_size: int = 20) -> list[str]: + """ + Convenience method to stream a complete reasoning block. + + Generates: reasoning-start, reasoning-deltas, reasoning-end + + Args: + reasoning: The full reasoning text + chunk_size: Size of each chunk + + Returns: + list[str]: List of all SSE formatted parts + """ + reasoning_id = self.generate_reasoning_id() + parts = [self.format_reasoning_start(reasoning_id)] + for i in range(0, len(reasoning), chunk_size): + chunk = reasoning[i : i + chunk_size] + parts.append(self.format_reasoning_delta(reasoning_id, chunk)) + parts.append(self.format_reasoning_end(reasoning_id)) + return parts + + def create_complete_response( + self, + text: str, + sources: list[dict[str, Any]] | None = None, + reasoning: str | None = None, + further_questions: list[str] | None = None, + chunk_size: int = 10, + ) -> list[str]: + """ + Create a complete streaming response with all parts. + + This is a convenience method that generates a full response + including message start, optional reasoning, text, sources, + further questions, and finish markers. + + Args: + text: The main response text + sources: Optional list of source references + reasoning: Optional reasoning/thinking content + further_questions: Optional follow-up questions + chunk_size: Size of text chunks + + Returns: + list[str]: List of all SSE formatted parts in correct order + """ + parts = [] + + # Start message + parts.append(self.format_message_start()) + parts.append(self.format_start_step()) + + # Reasoning (if provided) + if reasoning: + parts.extend(self.stream_full_reasoning(reasoning)) + + # Sources (before main text) + if sources: + parts.extend(self.format_sources(sources)) + + # Main text content + parts.extend(self.stream_full_text(text, chunk_size)) + + # Further questions (if provided) + if further_questions: + parts.append(self.format_further_questions(further_questions)) + + # Finish + parts.append(self.format_finish_step()) + parts.append(self.format_finish()) + parts.append(self.format_done()) + + return parts + + def reset(self) -> None: + """Reset the streaming context for a new message.""" + self.context = StreamContext() diff --git a/surfsense_backend/app/tasks/stream_connector_search_results.py b/surfsense_backend/app/tasks/chat/stream_connector_search_results.py similarity index 100% rename from surfsense_backend/app/tasks/stream_connector_search_results.py rename to surfsense_backend/app/tasks/chat/stream_connector_search_results.py diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py new file mode 100644 index 000000000..04f3f97c3 --- /dev/null +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -0,0 +1,210 @@ +""" +Streaming task for the new SurfSense deep agent chat. + +This module streams responses from the deep agent using the Vercel AI SDK +Data Stream Protocol (SSE format). +""" + +from collections.abc import AsyncGenerator +from uuid import UUID + +from langchain_core.messages import HumanMessage +from sqlalchemy.ext.asyncio import AsyncSession + +from app.agents.new_chat.chat_deepagent import ( + create_chat_litellm_from_config, + create_surfsense_deep_agent, + load_llm_config_from_yaml, +) +from app.services.connector_service import ConnectorService +from app.services.new_streaming_service import VercelStreamingService + + +async def stream_new_chat( + user_query: str, + user_id: str | UUID, + search_space_id: int, + chat_id: int, + session: AsyncSession, + llm_config_id: int = -1, +) -> AsyncGenerator[str, None]: + """ + Stream chat responses from the new SurfSense deep agent. + + This uses the Vercel AI SDK Data Stream Protocol (SSE format) for streaming. + The chat_id is used as LangGraph's thread_id for memory/checkpointing, + so chat history is automatically managed by LangGraph. + + Args: + user_query: The user's query + user_id: The user's ID (can be UUID object or string) + search_space_id: The search space ID + chat_id: The chat ID (used as LangGraph thread_id for memory) + session: The database session + llm_config_id: The LLM configuration ID (default: -1 for first global config) + + Yields: + str: SSE formatted response strings + """ + streaming_service = VercelStreamingService() + + # Convert UUID to string if needed + str(user_id) if isinstance(user_id, UUID) else user_id + + # Track the current text block for streaming (defined early for exception handling) + current_text_id: str | None = None + + try: + # Load LLM config + llm_config = load_llm_config_from_yaml(llm_config_id=llm_config_id) + if not llm_config: + yield streaming_service.format_error( + f"Failed to load LLM config with id {llm_config_id}" + ) + yield streaming_service.format_done() + return + + # Create ChatLiteLLM instance + llm = create_chat_litellm_from_config(llm_config) + if not llm: + yield streaming_service.format_error("Failed to create LLM instance") + yield streaming_service.format_done() + return + + # Create connector service + connector_service = ConnectorService(session, search_space_id=search_space_id) + + # Create the deep agent + agent = create_surfsense_deep_agent( + llm=llm, + search_space_id=search_space_id, + db_session=session, + connector_service=connector_service, + ) + + # Build input with just the current user query + # Chat history is managed by LangGraph via thread_id + input_state = { + "messages": [HumanMessage(content=user_query)], + "search_space_id": search_space_id, + } + + # Configure LangGraph with thread_id for memory + config = { + "configurable": { + "thread_id": str(chat_id), + } + } + + # Start the message stream + yield streaming_service.format_message_start() + yield streaming_service.format_start_step() + + # Reset text tracking for this stream + accumulated_text = "" + + # Stream the agent response with thread config for memory + async for event in agent.astream_events( + input_state, config=config, version="v2" + ): + event_type = event.get("event", "") + + # Handle chat model stream events (text streaming) + if event_type == "on_chat_model_stream": + chunk = event.get("data", {}).get("chunk") + if chunk and hasattr(chunk, "content"): + content = chunk.content + if content and isinstance(content, str): + # Start a new text block if needed + if current_text_id is None: + current_text_id = streaming_service.generate_text_id() + yield streaming_service.format_text_start(current_text_id) + + # Stream the text delta + yield streaming_service.format_text_delta( + current_text_id, content + ) + accumulated_text += content + + # Handle tool calls + elif event_type == "on_tool_start": + tool_name = event.get("name", "unknown_tool") + run_id = event.get("run_id", "") + tool_input = event.get("data", {}).get("input", {}) + + # End current text block if any + if current_text_id is not None: + yield streaming_service.format_text_end(current_text_id) + current_text_id = None + + # Stream tool info + tool_call_id = ( + f"call_{run_id[:32]}" + if run_id + else streaming_service.generate_tool_call_id() + ) + yield streaming_service.format_tool_input_start(tool_call_id, tool_name) + yield streaming_service.format_tool_input_available( + tool_call_id, + tool_name, + tool_input + if isinstance(tool_input, dict) + else {"input": tool_input}, + ) + + # Send terminal info about the tool call + if tool_name == "search_knowledge_base": + query = ( + tool_input.get("query", "") + if isinstance(tool_input, dict) + else str(tool_input) + ) + yield streaming_service.format_terminal_info( + f"Searching knowledge base: {query[:100]}{'...' if len(query) > 100 else ''}", + "info", + ) + + elif event_type == "on_tool_end": + run_id = event.get("run_id", "") + tool_output = event.get("data", {}).get("output", "") + + tool_call_id = f"call_{run_id[:32]}" if run_id else "call_unknown" + + # Don't stream the full output (can be very large), just acknowledge + yield streaming_service.format_tool_output_available( + tool_call_id, + {"status": "completed", "result_length": len(str(tool_output))}, + ) + + yield streaming_service.format_terminal_info( + "Knowledge base search completed", "success" + ) + + # Handle chain/agent end to close any open text blocks + elif event_type in ("on_chain_end", "on_agent_end"): + if current_text_id is not None: + yield streaming_service.format_text_end(current_text_id) + current_text_id = None + + # Ensure text block is closed + if current_text_id is not None: + yield streaming_service.format_text_end(current_text_id) + + # Finish the step and message + yield streaming_service.format_finish_step() + yield streaming_service.format_finish() + yield streaming_service.format_done() + + except Exception as e: + # Handle any errors + error_message = f"Error during chat: {e!s}" + print(f"[stream_new_chat] {error_message}") + + # Close any open text block + if current_text_id is not None: + yield streaming_service.format_text_end(current_text_id) + + yield streaming_service.format_error(error_message) + yield streaming_service.format_finish_step() + yield streaming_service.format_finish() + yield streaming_service.format_done() diff --git a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py index fe18f2d19..3d5fc77b0 100644 --- a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py @@ -177,7 +177,7 @@ async def index_crawled_urls( documents_skipped += 1 continue - # Format content as structured document + # Format content as structured document for summary generation (includes all metadata) structured_document = crawler.format_to_structured_document( crawl_result ) @@ -187,10 +187,14 @@ async def index_crawled_urls( DocumentType.CRAWLED_URL, url, search_space_id ) - # Generate content hash - # TODO: To fix this by not including dynamic content like date, time, etc. + # Generate content hash using a version WITHOUT metadata + # This ensures the hash only changes when actual content changes, + # not when metadata (which contains dynamic fields like timestamps, IDs, etc.) changes + structured_document_for_hash = crawler.format_to_structured_document( + crawl_result, exclude_metadata=True + ) content_hash = generate_content_hash( - structured_document, search_space_id + structured_document_for_hash, search_space_id ) # Check if document with this unique identifier already exists diff --git a/surfsense_backend/pyproject.toml b/surfsense_backend/pyproject.toml index 130a1556c..d2a18f671 100644 --- a/surfsense_backend/pyproject.toml +++ b/surfsense_backend/pyproject.toml @@ -51,6 +51,9 @@ dependencies = [ "litellm>=1.80.10", "langchain-litellm>=0.3.5", "langgraph>=1.0.5", + "fake-useragent>=2.2.0", + "deepagents>=0.3.0", + "trafilatura>=2.0.0", ] [dependency-groups] diff --git a/surfsense_backend/uv.lock b/surfsense_backend/uv.lock index e681c3048..7ef81b4d8 100644 --- a/surfsense_backend/uv.lock +++ b/surfsense_backend/uv.lock @@ -180,6 +180,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643 }, ] +[[package]] +name = "anthropic" +version = "0.75.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "distro" }, + { name = "docstring-parser" }, + { name = "httpx" }, + { name = "jiter" }, + { name = "pydantic" }, + { name = "sniffio" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/04/1f/08e95f4b7e2d35205ae5dcbb4ae97e7d477fc521c275c02609e2931ece2d/anthropic-0.75.0.tar.gz", hash = "sha256:e8607422f4ab616db2ea5baacc215dd5f028da99ce2f022e33c7c535b29f3dfb", size = 439565 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/60/1c/1cd02b7ae64302a6e06724bf80a96401d5313708651d277b1458504a1730/anthropic-0.75.0-py3-none-any.whl", hash = "sha256:ea8317271b6c15d80225a9f3c670152746e88805a7a61e14d4a374577164965b", size = 388164 }, +] + [[package]] name = "antlr4-python3-runtime" version = "4.9.3" @@ -383,6 +402,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/83/7b/5652771e24fff12da9dde4c20ecf4682e606b104f26419d139758cc935a6/azure_identity-1.25.1-py3-none-any.whl", hash = "sha256:e9edd720af03dff020223cd269fa3a61e8f345ea75443858273bcb44844ab651", size = 191317 }, ] +[[package]] +name = "babel" +version = "2.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7d/6b/d52e42361e1aa00709585ecc30b3f9684b3ab62530771402248b1b1d6240/babel-2.17.0.tar.gz", hash = "sha256:0c54cffb19f690cdcc52a3b50bcbf71e07a808d1c80d549f2459b9d2cf0afb9d", size = 9951852, upload-time = "2025-02-01T15:17:41.026Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/b8/3fe70c75fe32afc4bb507f75563d39bc5642255d1d94f1f23604725780bf/babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2", size = 10182537, upload-time = "2025-02-01T15:17:37.39Z" }, +] + [[package]] name = "backoff" version = "2.2.1" @@ -537,6 +565,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1c/24/a4301564a979368d6f3644f47acc921450b5524b8846e827237d98b04746/botocore-1.42.8-py3-none-any.whl", hash = "sha256:4cb89c74dd9083d16e45868749b999265a91309b2499907c84adeffa0a8df89b", size = 14534173 }, ] +[[package]] +name = "bracex" +version = "2.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/63/9a/fec38644694abfaaeca2798b58e276a8e61de49e2e37494ace423395febc/bracex-2.6.tar.gz", hash = "sha256:98f1347cd77e22ee8d967a30ad4e310b233f7754dbf31ff3fceb76145ba47dc7", size = 26642 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9d/2a/9186535ce58db529927f6cf5990a849aa9e052eea3e2cfefe20b9e1802da/bracex-2.6-py3-none-any.whl", hash = "sha256:0b0049264e7340b3ec782b5cb99beb325f36c3782a32e36e876452fd49a09952", size = 11508 }, +] + [[package]] name = "build" version = "1.3.0" @@ -928,6 +965,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b0/e6/6000d0094e8a5e32ad62591c8609e269febb6e4db83a1c75ff8868b42731/contourpy-1.3.2-cp313-cp313t-win_amd64.whl", hash = "sha256:78e9253c3de756b3f6a5174d024c4835acd59eb3f8e2ca13e775dbffe1558f69", size = 238214 }, ] +[[package]] +name = "courlan" +version = "1.3.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "babel" }, + { name = "tld" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6f/54/6d6ceeff4bed42e7a10d6064d35ee43a810e7b3e8beb4abeae8cff4713ae/courlan-1.3.2.tar.gz", hash = "sha256:0b66f4db3a9c39a6e22dd247c72cfaa57d68ea660e94bb2c84ec7db8712af190", size = 206382, upload-time = "2024-10-29T16:40:20.994Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8e/ca/6a667ccbe649856dcd3458bab80b016681b274399d6211187c6ab969fc50/courlan-1.3.2-py3-none-any.whl", hash = "sha256:d0dab52cf5b5b1000ee2839fbc2837e93b2514d3cb5bb61ae158a55b7a04c6be", size = 33848, upload-time = "2024-10-29T16:40:18.325Z" }, +] + [[package]] name = "cryptography" version = "45.0.4" @@ -1073,6 +1124,36 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a", size = 28686 }, ] +[[package]] +name = "deepagents" +version = "0.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "langchain" }, + { name = "langchain-anthropic" }, + { name = "langchain-core" }, + { name = "wcmatch" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cc/c7/d3c2840bd0e66b6cd5948aa69625e129328ad261308e18fcb9a9420709da/deepagents-0.3.0.tar.gz", hash = "sha256:3dd4d2ed53efb1ef78aeb1020a5696c0ec7e58e627b305a6665d33fe6fbdedff", size = 51387 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/22/e9/60bab7f37ff38bf982ea578e457ed1878ded613a3425462bcd07b00487e9/deepagents-0.3.0-py3-none-any.whl", hash = "sha256:9e23532d8d535dc2b0b4e0834453a1223a6a8f81b77947c0faf54537d05ce89a", size = 54065 }, +] + +[[package]] +name = "dateparser" +version = "1.2.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "python-dateutil" }, + { name = "pytz" }, + { name = "regex" }, + { name = "tzlocal" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a9/30/064144f0df1749e7bb5faaa7f52b007d7c2d08ec08fed8411aba87207f68/dateparser-1.2.2.tar.gz", hash = "sha256:986316f17cb8cdc23ea8ce563027c5ef12fc725b6fb1d137c14ca08777c5ecf7", size = 329840, upload-time = "2025-06-26T09:29:23.211Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/87/22/f020c047ae1346613db9322638186468238bcfa8849b4668a22b97faad65/dateparser-1.2.2-py3-none-any.whl", hash = "sha256:5a5d7211a09013499867547023a2a0c91d5a27d15dd4dbcea676ea9fe66f2482", size = 315453, upload-time = "2025-06-26T09:29:21.412Z" }, +] + [[package]] name = "defusedxml" version = "0.7.1" @@ -1284,6 +1365,15 @@ version = "0.6.2" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/a2/55/8f8cab2afd404cf578136ef2cc5dfb50baa1761b68c9da1fb1e4eed343c9/docopt-0.6.2.tar.gz", hash = "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491", size = 25901 } +[[package]] +name = "docstring-parser" +version = "0.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b2/9d/c3b43da9515bd270df0f80548d9944e389870713cc1fe2b8fb35fe2bcefd/docstring_parser-0.17.0.tar.gz", hash = "sha256:583de4a309722b3315439bb31d64ba3eebada841f2e2cee23b99df001434c912", size = 27442 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/55/e2/2537ebcff11c1ee1ff17d8d0b6f4db75873e3b0fb32c2d4a2ee31ecb310a/docstring_parser-0.17.0-py3-none-any.whl", hash = "sha256:cf2569abd23dce8099b300f9b4fa8191e9582dda731fd533daf54c4551658708", size = 36896 }, +] + [[package]] name = "docutils" version = "0.21.2" @@ -1419,6 +1509,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059 }, ] +[[package]] +name = "fake-useragent" +version = "2.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/41/43/948d10bf42735709edb5ae51e23297d034086f17fc7279fef385a7acb473/fake_useragent-2.2.0.tar.gz", hash = "sha256:4e6ab6571e40cc086d788523cf9e018f618d07f9050f822ff409a4dfe17c16b2", size = 158898 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/51/37/b3ea9cd5558ff4cb51957caca2193981c6b0ff30bd0d2630ac62505d99d0/fake_useragent-2.2.0-py3-none-any.whl", hash = "sha256:67f35ca4d847b0d298187443aaf020413746e56acd985a611908c73dba2daa24", size = 161695 }, +] + [[package]] name = "fastapi" version = "0.115.9" @@ -2119,6 +2218,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6c/dd/a834df6482147d48e225a49515aabc28974ad5a4ca3215c18a882565b028/html5lib-1.1-py2.py3-none-any.whl", hash = "sha256:0d78f8fde1c230e99fe37986a60526d7049ed4bf8a9fadbad5f00e22e58e041d", size = 112173 }, ] +[[package]] +name = "htmldate" +version = "1.9.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "charset-normalizer" }, + { name = "dateparser" }, + { name = "lxml" }, + { name = "python-dateutil" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9d/10/ead9dabc999f353c3aa5d0dc0835b1e355215a5ecb489a7f4ef2ddad5e33/htmldate-1.9.4.tar.gz", hash = "sha256:1129063e02dd0354b74264de71e950c0c3fcee191178321418ccad2074cc8ed0", size = 44690, upload-time = "2025-11-04T17:46:44.983Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a1/bd/adfcdaaad5805c0c5156aeefd64c1e868c05e9c1cd6fd21751f168cd88c7/htmldate-1.9.4-py3-none-any.whl", hash = "sha256:1b94bcc4e08232a5b692159903acf95548b6a7492dddca5bb123d89d6325921c", size = 31558, upload-time = "2025-11-04T17:46:43.258Z" }, +] + [[package]] name = "httpcore" version = "1.0.9" @@ -2509,6 +2624,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/01/0e/b27cdbaccf30b890c40ed1da9fd4a3593a5cf94dae54fb34f8a4b74fcd3f/jsonschema_specifications-2025.4.1-py3-none-any.whl", hash = "sha256:4653bffbd6584f7de83a67e0d620ef16900b390ddc7939d56684d6c81e33f1af", size = 18437 }, ] +[[package]] +name = "justext" +version = "3.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "lxml", extra = ["html-clean"] }, +] +sdist = { url = "https://files.pythonhosted.org/packages/49/f3/45890c1b314f0d04e19c1c83d534e611513150939a7cf039664d9ab1e649/justext-3.0.2.tar.gz", hash = "sha256:13496a450c44c4cd5b5a75a5efcd9996066d2a189794ea99a49949685a0beb05", size = 828521, upload-time = "2025-02-25T20:21:49.934Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f2/ac/52f4e86d1924a7fc05af3aeb34488570eccc39b4af90530dd6acecdf16b5/justext-3.0.2-py2.py3-none-any.whl", hash = "sha256:62b1c562b15c3c6265e121cc070874243a443bfd53060e869393f09d6b6cc9a7", size = 837940, upload-time = "2025-02-25T20:21:44.179Z" }, +] + [[package]] name = "keyring" version = "25.6.0" @@ -2650,6 +2777,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/23/00/4e3fa0d90f5a5c376ccb8ca983d0f0f7287783dfac48702e18f01d24673b/langchain-1.2.0-py3-none-any.whl", hash = "sha256:82f0d17aa4fbb11560b30e1e7d4aeb75e3ad71ce09b85c90ab208b181a24ffac", size = 102828 }, ] +[[package]] +name = "langchain-anthropic" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anthropic" }, + { name = "langchain-core" }, + { name = "pydantic" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/de/50/cc3b3e0410d86de457d7a100dde763fc1c33c4ce884e883659aa4cf95538/langchain_anthropic-1.3.0.tar.gz", hash = "sha256:497a937ee0310c588196bff37f39f02d43d87bff3a12d16278bdbc3bd0e9a80b", size = 707207 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/29/ca/0725bc347a9c226da9d76f85bf7d03115caec7dbc87876af68579c4ab24e/langchain_anthropic-1.3.0-py3-none-any.whl", hash = "sha256:3823560e1df15d6082636baa04f87cb59052ba70aada0eba381c4679b1ce0eba", size = 45724 }, +] + [[package]] name = "langchain-community" version = "0.3.31" @@ -3018,6 +3159,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fc/14/c115516c62a7d2499781d2d3d7215218c0731b2c940753bf9f9b7b73924d/lxml-5.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:bcb7a1096b4b6b24ce1ac24d4942ad98f983cd3810f9711bcd0293f43a9d8b9f", size = 3814606 }, ] +[package.optional-dependencies] +html-clean = [ + { name = "lxml-html-clean" }, +] + +[[package]] +name = "lxml-html-clean" +version = "0.4.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "lxml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d9/cb/c9c5bb2a9c47292e236a808dd233a03531f53b626f36259dcd32b49c76da/lxml_html_clean-0.4.3.tar.gz", hash = "sha256:c9df91925b00f836c807beab127aac82575110eacff54d0a75187914f1bd9d8c", size = 21498, upload-time = "2025-10-02T20:49:24.895Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/4a/63a9540e3ca73709f4200564a737d63a4c8c9c4dd032bab8535f507c190a/lxml_html_clean-0.4.3-py3-none-any.whl", hash = "sha256:63fd7b0b9c3a2e4176611c2ca5d61c4c07ffca2de76c14059a81a2825833731e", size = 14177, upload-time = "2025-10-02T20:49:23.749Z" }, +] + [[package]] name = "magika" version = "0.6.2" @@ -4321,7 +4479,7 @@ dependencies = [ { name = "pinecone-plugin-interface" }, { name = "python-dateutil" }, { name = "typing-extensions" }, - { name = "urllib3", marker = "python_full_version < '4.0'" }, + { name = "urllib3", marker = "python_full_version < '4'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/fa/38/12731d4af470851b4963eba616605868a8599ef4df51c7b6c928e5f3166d/pinecone-7.3.0.tar.gz", hash = "sha256:307edc155621d487c20dc71b76c3ad5d6f799569ba42064190d03917954f9a7b", size = 235256 } wheels = [ @@ -6034,10 +6192,12 @@ dependencies = [ { name = "boto3" }, { name = "celery", extra = ["redis"] }, { name = "chonkie", extra = ["all"] }, + { name = "deepagents" }, { name = "discord-py" }, { name = "docling" }, { name = "elasticsearch" }, { name = "en-core-web-sm" }, + { name = "fake-useragent" }, { name = "fastapi" }, { name = "fastapi-users", extra = ["oauth", "sqlalchemy"] }, { name = "faster-whisper" }, @@ -6070,6 +6230,7 @@ dependencies = [ { name = "spacy" }, { name = "static-ffmpeg" }, { name = "tavily-python" }, + { name = "trafilatura" }, { name = "unstructured", extra = ["all-docs"] }, { name = "unstructured-client" }, { name = "uvicorn", extra = ["standard"] }, @@ -6089,10 +6250,12 @@ requires-dist = [ { name = "boto3", specifier = ">=1.35.0" }, { name = "celery", extras = ["redis"], specifier = ">=5.5.3" }, { name = "chonkie", extras = ["all"], specifier = ">=1.4.0" }, + { name = "deepagents", specifier = ">=0.3.0" }, { name = "discord-py", specifier = ">=2.5.2" }, { name = "docling", specifier = ">=2.15.0" }, { name = "elasticsearch", specifier = ">=9.1.1" }, { name = "en-core-web-sm", url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl" }, + { name = "fake-useragent", specifier = ">=2.2.0" }, { name = "fastapi", specifier = ">=0.115.8" }, { name = "fastapi-users", extras = ["oauth", "sqlalchemy"], specifier = ">=14.0.1" }, { name = "faster-whisper", specifier = ">=1.1.0" }, @@ -6125,6 +6288,7 @@ requires-dist = [ { name = "spacy", specifier = ">=3.8.7" }, { name = "static-ffmpeg", specifier = ">=2.13" }, { name = "tavily-python", specifier = ">=0.3.2" }, + { name = "trafilatura", specifier = ">=2.0.0" }, { name = "unstructured", extras = ["all-docs"], specifier = ">=0.16.25" }, { name = "unstructured-client", specifier = ">=0.30.0" }, { name = "uvicorn", extras = ["standard"], specifier = ">=0.34.0" }, @@ -6276,6 +6440,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6c/d0/179abca8b984b3deefd996f362b612c39da73b60f685921e6cd58b6125b4/timm-1.0.15-py3-none-any.whl", hash = "sha256:5a3dc460c24e322ecc7fd1f3e3eb112423ddee320cb059cc1956fbc9731748ef", size = 2361373 }, ] +[[package]] +name = "tld" +version = "0.13.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/df/a1/5723b07a70c1841a80afc9ac572fdf53488306848d844cd70519391b0d26/tld-0.13.1.tar.gz", hash = "sha256:75ec00936cbcf564f67361c41713363440b6c4ef0f0c1592b5b0fbe72c17a350", size = 462000, upload-time = "2025-05-21T22:18:29.341Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/70/b2f38360c3fc4bc9b5e8ef429e1fde63749144ac583c2dbdf7e21e27a9ad/tld-0.13.1-py2.py3-none-any.whl", hash = "sha256:a2d35109433ac83486ddf87e3c4539ab2c5c2478230e5d9c060a18af4b03aa7c", size = 274718, upload-time = "2025-05-21T22:18:25.811Z" }, +] + [[package]] name = "tokenizers" version = "0.21.1" @@ -6399,6 +6572,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540 }, ] +[[package]] +name = "trafilatura" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "courlan" }, + { name = "htmldate" }, + { name = "justext" }, + { name = "lxml" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/25/e3ebeefdebfdfae8c4a4396f5a6ea51fc6fa0831d63ce338e5090a8003dc/trafilatura-2.0.0.tar.gz", hash = "sha256:ceb7094a6ecc97e72fea73c7dba36714c5c5b577b6470e4520dca893706d6247", size = 253404, upload-time = "2024-12-03T15:23:24.16Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/b6/097367f180b6383a3581ca1b86fcae284e52075fa941d1232df35293363c/trafilatura-2.0.0-py3-none-any.whl", hash = "sha256:77eb5d1e993747f6f20938e1de2d840020719735690c840b9a1024803a4cd51d", size = 132557, upload-time = "2024-12-03T15:23:21.41Z" }, +] + [[package]] name = "transformers" version = "4.52.4" @@ -6641,6 +6832,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839 }, ] +[[package]] +name = "tzlocal" +version = "5.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "tzdata", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8b/2e/c14812d3d4d9cd1773c6be938f89e5735a1f11a9f184ac3639b93cef35d5/tzlocal-5.3.1.tar.gz", hash = "sha256:cceffc7edecefea1f595541dbd6e990cb1ea3d19bf01b2809f362a03dd7921fd", size = 30761, upload-time = "2025-03-05T21:17:41.549Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c2/14/e2a54fabd4f08cd7af1c07030603c3356b74da07f7cc056e600436edfa17/tzlocal-5.3.1-py3-none-any.whl", hash = "sha256:eb1a66c3ef5847adf7a834f1be0800581b683b5608e74f86ecbcef8ab91bb85d", size = 18026, upload-time = "2025-03-05T21:17:39.857Z" }, +] + [[package]] name = "unstructured" version = "0.17.2" @@ -6968,6 +7171,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/32/fa/a4f5c2046385492b2273213ef815bf71a0d4c1943b784fb904e184e30201/watchfiles-1.1.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:af06c863f152005c7592df1d6a7009c836a247c9d8adb78fef8575a5a98699db", size = 623315 }, ] +[[package]] +name = "wcmatch" +version = "10.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "bracex" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/79/3e/c0bdc27cf06f4e47680bd5803a07cb3dfd17de84cde92dd217dcb9e05253/wcmatch-10.1.tar.gz", hash = "sha256:f11f94208c8c8484a16f4f48638a85d771d9513f4ab3f37595978801cb9465af", size = 117421 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/eb/d8/0d1d2e9d3fabcf5d6840362adcf05f8cf3cd06a73358140c3a97189238ae/wcmatch-10.1-py3-none-any.whl", hash = "sha256:5848ace7dbb0476e5e55ab63c6bbd529745089343427caa5537f230cc01beb8a", size = 39854 }, +] + [[package]] name = "wcwidth" version = "0.2.14" diff --git a/surfsense_web/app/dashboard/[search_space_id]/client-layout.tsx b/surfsense_web/app/dashboard/[search_space_id]/client-layout.tsx index 82197921a..c0f5bf0b0 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/client-layout.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/client-layout.tsx @@ -9,6 +9,7 @@ import type React from "react"; import { useCallback, useEffect, useMemo, useState } from "react"; import { activeChathatUIAtom, activeChatIdAtom } from "@/atoms/chats/ui.atoms"; import { llmPreferencesAtom } from "@/atoms/llm-config/llm-config-query.atoms"; +import { myAccessAtom } from "@/atoms/members/members-query.atoms"; import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms"; import { ChatPanelContainer } from "@/components/chat/ChatPanel/ChatPanelContainer"; import { DashboardBreadcrumb } from "@/components/dashboard-breadcrumb"; @@ -17,7 +18,6 @@ import { AppSidebarProvider } from "@/components/sidebar/AppSidebarProvider"; import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card"; import { Separator } from "@/components/ui/separator"; import { SidebarInset, SidebarProvider, SidebarTrigger } from "@/components/ui/sidebar"; -import { useUserAccess } from "@/hooks/use-rbac"; import { cn } from "@/lib/utils"; export function DashboardClientLayout({ @@ -69,7 +69,7 @@ export function DashboardClientLayout({ ); }, [preferences]); - const { access, loading: accessLoading } = useUserAccess(searchSpaceIdNum); + const { data: access = null, isLoading: accessLoading } = useAtomValue(myAccessAtom); const [hasCheckedOnboarding, setHasCheckedOnboarding] = useState(false); // Skip onboarding check if we're already on the onboarding page diff --git a/surfsense_web/app/dashboard/[search_space_id]/team/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/team/page.tsx index 5f9c4dbad..49e1de2ab 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/team/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/team/page.tsx @@ -46,6 +46,15 @@ import { motion } from "motion/react"; import { useParams, useRouter } from "next/navigation"; import { useCallback, useMemo, useState } from "react"; import { toast } from "sonner"; +import { + createInviteMutationAtom, + deleteInviteMutationAtom, +} from "@/atoms/invites/invites-mutation.atoms"; +import { + deleteMemberMutationAtom, + updateMemberMutationAtom, +} from "@/atoms/members/members-mutation.atoms"; +import { membersAtom, myAccessAtom } from "@/atoms/members/members-query.atoms"; import { permissionsAtom } from "@/atoms/permissions/permissions-query.atoms"; import { createRoleMutationAtom, @@ -107,20 +116,23 @@ import { } from "@/components/ui/table"; import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs"; import { Textarea } from "@/components/ui/textarea"; +import type { + CreateInviteRequest, + DeleteInviteRequest, + Invite, +} from "@/contracts/types/invites.types"; +import type { + DeleteMembershipRequest, + Membership, + UpdateMembershipRequest, +} from "@/contracts/types/members.types"; import type { CreateRoleRequest, DeleteRoleRequest, Role, UpdateRoleRequest, } from "@/contracts/types/roles.types"; -import { - type Invite, - type InviteCreate, - type Member, - useInvites, - useMembers, - useUserAccess, -} from "@/hooks/use-rbac"; +import { invitesApiService } from "@/lib/apis/invites-api.service"; import { rolesApiService } from "@/lib/apis/roles-api.service"; import { cacheKeys } from "@/lib/query-client/cache-keys"; import { cn } from "@/lib/utils"; @@ -154,18 +166,54 @@ export default function TeamManagementPage() { const searchSpaceId = Number(params.search_space_id); const [activeTab, setActiveTab] = useState("members"); - const { access, loading: accessLoading, hasPermission } = useUserAccess(searchSpaceId); + const { data: access = null, isLoading: accessLoading } = useAtomValue(myAccessAtom); + + const hasPermission = useCallback( + (permission: string) => { + if (!access) return false; + if (access.is_owner) return true; + return access.permissions?.includes(permission) ?? false; + }, + [access] + ); + const { - members, - loading: membersLoading, - fetchMembers, - updateMemberRole, - removeMember, - } = useMembers(searchSpaceId); + data: members = [], + isLoading: membersLoading, + refetch: fetchMembers, + } = useAtomValue(membersAtom); const { mutateAsync: createRole } = useAtomValue(createRoleMutationAtom); const { mutateAsync: updateRole } = useAtomValue(updateRoleMutationAtom); const { mutateAsync: deleteRole } = useAtomValue(deleteRoleMutationAtom); + const { mutateAsync: updateMember } = useAtomValue(updateMemberMutationAtom); + + const { mutateAsync: deleteMember } = useAtomValue(deleteMemberMutationAtom); + const { mutateAsync: createInvite } = useAtomValue(createInviteMutationAtom); + const { mutateAsync: revokeInvite } = useAtomValue(deleteInviteMutationAtom); + + const handleRevokeInvite = useCallback( + async (inviteId: number): Promise => { + const request: DeleteInviteRequest = { + search_space_id: searchSpaceId, + invite_id: inviteId, + }; + await revokeInvite(request); + return true; + }, + [revokeInvite, searchSpaceId] + ); + + const handleCreateInvite = useCallback( + async (inviteData: CreateInviteRequest["data"]) => { + const request: CreateInviteRequest = { + search_space_id: searchSpaceId, + data: inviteData, + }; + return await createInvite(request); + }, + [createInvite, searchSpaceId] + ); const handleUpdateRole = useCallback( async (roleId: number, data: { permissions?: string[] }): Promise => { @@ -202,6 +250,32 @@ export default function TeamManagementPage() { [createRole, searchSpaceId] ); + const handleUpdateMember = useCallback( + async (membershipId: number, roleId: number | null): Promise => { + const request: UpdateMembershipRequest = { + search_space_id: searchSpaceId, + membership_id: membershipId, + data: { + role_id: roleId, + }, + }; + return (await updateMember(request)) as Membership; + }, + [updateMember, searchSpaceId] + ); + + const handleRemoveMember = useCallback( + async (membershipId: number) => { + const request: DeleteMembershipRequest = { + search_space_id: searchSpaceId, + membership_id: membershipId, + }; + await deleteMember(request); + + return true; + }, + [deleteMember, searchSpaceId] + ); const { data: roles = [], isLoading: rolesLoading, @@ -212,12 +286,14 @@ export default function TeamManagementPage() { enabled: !!searchSpaceId, }); const { - invites, - loading: invitesLoading, - fetchInvites, - createInvite, - revokeInvite, - } = useInvites(searchSpaceId); + data: invites = [], + isLoading: invitesLoading, + refetch: fetchInvites, + } = useQuery({ + queryKey: cacheKeys.invites.all(searchSpaceId.toString()), + queryFn: () => invitesApiService.getInvites({ search_space_id: searchSpaceId }), + staleTime: 5 * 60 * 1000, + }); const { data: permissionsData, isLoading: permissionsLoading } = useAtomValue(permissionsAtom); const permissions = permissionsData?.permissions || []; @@ -387,7 +463,7 @@ export default function TeamManagementPage() { {activeTab === "invites" && canInvite && ( )} @@ -404,8 +480,8 @@ export default function TeamManagementPage() { members={members} roles={roles} loading={membersLoading} - onUpdateRole={updateMemberRole} - onRemoveMember={removeMember} + onUpdateRole={handleUpdateMember} + onRemoveMember={handleRemoveMember} canManageRoles={hasPermission("members:manage_roles")} canRemove={hasPermission("members:remove")} /> @@ -427,7 +503,7 @@ export default function TeamManagementPage() { @@ -449,10 +525,10 @@ function MembersTab({ canManageRoles, canRemove, }: { - members: Member[]; + members: Membership[]; roles: Role[]; loading: boolean; - onUpdateRole: (membershipId: number, roleId: number | null) => Promise; + onUpdateRole: (membershipId: number, roleId: number | null) => Promise; onRemoveMember: (membershipId: number) => Promise; canManageRoles: boolean; canRemove: boolean; @@ -1016,7 +1092,7 @@ function CreateInviteDialog({ searchSpaceId, }: { roles: Role[]; - onCreateInvite: (data: InviteCreate) => Promise; + onCreateInvite: (data: CreateInviteRequest["data"]) => Promise; searchSpaceId: number; }) { const [open, setOpen] = useState(false); @@ -1031,7 +1107,7 @@ function CreateInviteDialog({ const handleCreate = async () => { setCreating(true); try { - const data: InviteCreate = {}; + const data: CreateInviteRequest["data"] = {}; if (name) data.name = name; if (roleId && roleId !== "default") data.role_id = Number(roleId); if (maxUses) data.max_uses = Number(maxUses); diff --git a/surfsense_web/app/invite/[invite_code]/page.tsx b/surfsense_web/app/invite/[invite_code]/page.tsx index 4ff78ac91..30e93c022 100644 --- a/surfsense_web/app/invite/[invite_code]/page.tsx +++ b/surfsense_web/app/invite/[invite_code]/page.tsx @@ -1,5 +1,7 @@ "use client"; +import { useQuery } from "@tanstack/react-query"; +import { useAtomValue } from "jotai"; import { AlertCircle, ArrowRight, @@ -16,7 +18,9 @@ import { motion } from "motion/react"; import Image from "next/image"; import Link from "next/link"; import { useParams, useRouter } from "next/navigation"; -import { use, useEffect, useState } from "react"; +import { use, useCallback, useEffect, useState } from "react"; +import { toast } from "sonner"; +import { acceptInviteMutationAtom } from "@/atoms/invites/invites-mutation.atoms"; import { Button } from "@/components/ui/button"; import { Card, @@ -26,22 +30,48 @@ import { CardHeader, CardTitle, } from "@/components/ui/card"; -import { useInviteInfo } from "@/hooks/use-rbac"; +import type { AcceptInviteResponse } from "@/contracts/types/invites.types"; +import { invitesApiService } from "@/lib/apis/invites-api.service"; import { getBearerToken } from "@/lib/auth-utils"; +import { cacheKeys } from "@/lib/query-client/cache-keys"; export default function InviteAcceptPage() { const params = useParams(); const router = useRouter(); const inviteCode = params.invite_code as string; - const { inviteInfo, loading, acceptInvite } = useInviteInfo(inviteCode); + const { data: inviteInfo = null, isLoading: loading } = useQuery({ + queryKey: cacheKeys.invites.info(inviteCode), + enabled: !!inviteCode, + staleTime: 5 * 60 * 1000, + queryFn: async () => { + if (!inviteCode) return null; + return invitesApiService.getInviteInfo({ + invite_code: inviteCode, + }); + }, + }); + + const { mutateAsync: acceptInviteMutation } = useAtomValue(acceptInviteMutationAtom); + + const acceptInvite = useCallback(async () => { + if (!inviteCode) { + toast.error("No invite code provided"); + return null; + } + + try { + const result = await acceptInviteMutation({ invite_code: inviteCode }); + return result; + } catch (err: any) { + toast.error(err.message || "Failed to accept invite"); + throw err; + } + }, [inviteCode, acceptInviteMutation]); + const [accepting, setAccepting] = useState(false); const [accepted, setAccepted] = useState(false); - const [acceptedData, setAcceptedData] = useState<{ - search_space_id: number; - search_space_name: string; - role_name: string; - } | null>(null); + const [acceptedData, setAcceptedData] = useState(null); const [error, setError] = useState(null); const [isLoggedIn, setIsLoggedIn] = useState(null); diff --git a/surfsense_web/atoms/invites/invites-mutation.atoms.ts b/surfsense_web/atoms/invites/invites-mutation.atoms.ts new file mode 100644 index 000000000..f2acb6e4a --- /dev/null +++ b/surfsense_web/atoms/invites/invites-mutation.atoms.ts @@ -0,0 +1,85 @@ +import { atomWithMutation } from "jotai-tanstack-query"; +import { toast } from "sonner"; +import type { + AcceptInviteRequest, + CreateInviteRequest, + DeleteInviteRequest, + UpdateInviteRequest, +} from "@/contracts/types/invites.types"; +import { invitesApiService } from "@/lib/apis/invites-api.service"; +import { cacheKeys } from "@/lib/query-client/cache-keys"; +import { queryClient } from "@/lib/query-client/client"; + +/** + * Mutation atom for creating an invite + */ +export const createInviteMutationAtom = atomWithMutation(() => ({ + mutationFn: async (request: CreateInviteRequest) => { + return invitesApiService.createInvite(request); + }, + onSuccess: (_, variables) => { + queryClient.invalidateQueries({ + queryKey: cacheKeys.invites.all(variables.search_space_id.toString()), + }); + toast.success("Invite created successfully"); + }, + onError: (error: Error) => { + console.error("Error creating invite:", error); + toast.error("Failed to create invite"); + }, +})); + +/** + * Mutation atom for updating an invite + */ +export const updateInviteMutationAtom = atomWithMutation(() => ({ + mutationFn: async (request: UpdateInviteRequest) => { + return invitesApiService.updateInvite(request); + }, + onSuccess: (_, variables) => { + queryClient.invalidateQueries({ + queryKey: cacheKeys.invites.all(variables.search_space_id.toString()), + }); + toast.success("Invite updated successfully"); + }, + onError: (error: Error) => { + console.error("Error updating invite:", error); + toast.error("Failed to update invite"); + }, +})); + +/** + * Mutation atom for deleting an invite + */ +export const deleteInviteMutationAtom = atomWithMutation(() => ({ + mutationFn: async (request: DeleteInviteRequest) => { + return invitesApiService.deleteInvite(request); + }, + onSuccess: (_, variables) => { + queryClient.invalidateQueries({ + queryKey: cacheKeys.invites.all(variables.search_space_id.toString()), + }); + toast.success("Invite deleted successfully"); + }, + onError: (error: Error) => { + console.error("Error deleting invite:", error); + toast.error("Failed to delete invite"); + }, +})); + +/** + * Mutation atom for accepting an invite + */ +export const acceptInviteMutationAtom = atomWithMutation(() => ({ + mutationFn: async (request: AcceptInviteRequest) => { + return invitesApiService.acceptInvite(request); + }, + onSuccess: () => { + queryClient.invalidateQueries({ queryKey: cacheKeys.searchSpaces.all }); + toast.success("Invite accepted successfully"); + }, + onError: (error: Error) => { + console.error("Error accepting invite:", error); + toast.error("Failed to accept invite"); + }, +})); diff --git a/surfsense_web/atoms/invites/invites-query.atoms.ts b/surfsense_web/atoms/invites/invites-query.atoms.ts new file mode 100644 index 000000000..db1aa70a0 --- /dev/null +++ b/surfsense_web/atoms/invites/invites-query.atoms.ts @@ -0,0 +1,22 @@ +import { atomWithQuery } from "jotai-tanstack-query"; +import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms"; +import { invitesApiService } from "@/lib/apis/invites-api.service"; +import { cacheKeys } from "@/lib/query-client/cache-keys"; + +export const invitesAtom = atomWithQuery((get) => { + const searchSpaceId = get(activeSearchSpaceIdAtom); + + return { + queryKey: cacheKeys.invites.all(searchSpaceId?.toString() ?? ""), + enabled: !!searchSpaceId, + staleTime: 5 * 60 * 1000, // 5 minutes + queryFn: async () => { + if (!searchSpaceId) { + return []; + } + return invitesApiService.getInvites({ + search_space_id: Number(searchSpaceId), + }); + }, + }; +}); diff --git a/surfsense_web/atoms/members/members-mutation.atoms.ts b/surfsense_web/atoms/members/members-mutation.atoms.ts new file mode 100644 index 000000000..d01d3b489 --- /dev/null +++ b/surfsense_web/atoms/members/members-mutation.atoms.ts @@ -0,0 +1,64 @@ +import { atomWithMutation } from "jotai-tanstack-query"; +import { toast } from "sonner"; +import type { + DeleteMembershipRequest, + DeleteMembershipResponse, + LeaveSearchSpaceRequest, + LeaveSearchSpaceResponse, + UpdateMembershipRequest, + UpdateMembershipResponse, +} from "@/contracts/types/members.types"; +import { membersApiService } from "@/lib/apis/members-api.service"; +import { cacheKeys } from "@/lib/query-client/cache-keys"; +import { queryClient } from "@/lib/query-client/client"; + +export const updateMemberMutationAtom = atomWithMutation(() => { + return { + mutationFn: async (request: UpdateMembershipRequest) => { + return membersApiService.updateMember(request); + }, + onSuccess: (_: UpdateMembershipResponse, request: UpdateMembershipRequest) => { + toast.success("Member updated successfully"); + queryClient.invalidateQueries({ + queryKey: cacheKeys.members.all(request.search_space_id.toString()), + }); + }, + onError: () => { + toast.error("Failed to update member"); + }, + }; +}); + +export const deleteMemberMutationAtom = atomWithMutation(() => { + return { + mutationFn: async (request: DeleteMembershipRequest) => { + return membersApiService.deleteMember(request); + }, + onSuccess: (_: DeleteMembershipResponse, request: DeleteMembershipRequest) => { + toast.success("Member removed successfully"); + queryClient.invalidateQueries({ + queryKey: cacheKeys.members.all(request.search_space_id.toString()), + }); + }, + onError: () => { + toast.error("Failed to remove member"); + }, + }; +}); + +export const leaveSearchSpaceMutationAtom = atomWithMutation(() => { + return { + mutationFn: async (request: LeaveSearchSpaceRequest) => { + return membersApiService.leaveSearchSpace(request); + }, + onSuccess: (_: LeaveSearchSpaceResponse, request: LeaveSearchSpaceRequest) => { + toast.success("Successfully left the search space"); + queryClient.invalidateQueries({ + queryKey: cacheKeys.members.all(request.search_space_id.toString()), + }); + }, + onError: () => { + toast.error("Failed to leave search space"); + }, + }; +}); diff --git a/surfsense_web/atoms/members/members-query.atoms.ts b/surfsense_web/atoms/members/members-query.atoms.ts new file mode 100644 index 000000000..8ed56ef0c --- /dev/null +++ b/surfsense_web/atoms/members/members-query.atoms.ts @@ -0,0 +1,40 @@ +import { atomWithQuery } from "jotai-tanstack-query"; +import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms"; +import { membersApiService } from "@/lib/apis/members-api.service"; +import { cacheKeys } from "@/lib/query-client/cache-keys"; + +export const membersAtom = atomWithQuery((get) => { + const searchSpaceId = get(activeSearchSpaceIdAtom); + + return { + queryKey: cacheKeys.members.all(searchSpaceId?.toString() ?? ""), + enabled: !!searchSpaceId, + staleTime: 5 * 60 * 1000, // 5 minutes + queryFn: async () => { + if (!searchSpaceId) { + return []; + } + return membersApiService.getMembers({ + search_space_id: Number(searchSpaceId), + }); + }, + }; +}); + +export const myAccessAtom = atomWithQuery((get) => { + const searchSpaceId = get(activeSearchSpaceIdAtom); + + return { + queryKey: cacheKeys.members.myAccess(searchSpaceId?.toString() ?? ""), + enabled: !!searchSpaceId, + staleTime: 5 * 60 * 1000, // 5 minutes + queryFn: async () => { + if (!searchSpaceId) { + return null; + } + return membersApiService.getMyAccess({ + search_space_id: Number(searchSpaceId), + }); + }, + }; +}); diff --git a/surfsense_web/contracts/types/invites.types.ts b/surfsense_web/contracts/types/invites.types.ts index 2a9460e53..0359d84d5 100644 --- a/surfsense_web/contracts/types/invites.types.ts +++ b/surfsense_web/contracts/types/invites.types.ts @@ -77,11 +77,10 @@ export const getInviteInfoRequest = z.object({ }); export const getInviteInfoResponse = z.object({ - invite_code: z.string(), search_space_name: z.string(), role_name: z.string().nullable(), - expires_at: z.string().nullable(), is_valid: z.boolean(), + message: z.string().nullable(), }); /** @@ -94,6 +93,8 @@ export const acceptInviteRequest = z.object({ export const acceptInviteResponse = z.object({ message: z.string(), search_space_id: z.number(), + search_space_name: z.string(), + role_name: z.string().nullable(), }); export type Invite = z.infer; diff --git a/surfsense_web/contracts/types/members.types.ts b/surfsense_web/contracts/types/members.types.ts index a6d6333ac..d20109b96 100644 --- a/surfsense_web/contracts/types/members.types.ts +++ b/surfsense_web/contracts/types/members.types.ts @@ -30,7 +30,7 @@ export const updateMembershipRequest = z.object({ search_space_id: z.number(), membership_id: z.number(), data: z.object({ - role_id: z.number(), + role_id: z.number().nullable(), }), }); @@ -67,7 +67,7 @@ export const getMyAccessRequest = z.object({ }); export const getMyAccessResponse = z.object({ - user_id: z.string(), + search_space_name: z.string(), search_space_id: z.number(), is_owner: z.boolean(), permissions: z.array(z.string()), diff --git a/surfsense_web/hooks/index.ts b/surfsense_web/hooks/index.ts index db454c161..60afebc45 100644 --- a/surfsense_web/hooks/index.ts +++ b/surfsense_web/hooks/index.ts @@ -1,4 +1,3 @@ export * from "./use-debounced-value"; export * from "./use-logs"; -export * from "./use-rbac"; export * from "./use-search-source-connectors"; diff --git a/surfsense_web/hooks/use-rbac.ts b/surfsense_web/hooks/use-rbac.ts deleted file mode 100644 index fa619407a..000000000 --- a/surfsense_web/hooks/use-rbac.ts +++ /dev/null @@ -1,499 +0,0 @@ -"use client"; - -import { useCallback, useEffect, useMemo, useState } from "react"; -import { toast } from "sonner"; -import { authenticatedFetch, getBearerToken, handleUnauthorized } from "@/lib/auth-utils"; - -// ============ Types ============ - -export interface Role { - id: number; - name: string; - description: string | null; - permissions: string[]; - is_default: boolean; - is_system_role: boolean; - search_space_id: number; - created_at: string; -} - -export interface Member { - id: number; - user_id: string; - search_space_id: number; - role_id: number | null; - is_owner: boolean; - joined_at: string; - created_at: string; - role: Role | null; - user_email: string | null; -} - -export interface Invite { - id: number; - invite_code: string; - search_space_id: number; - role_id: number | null; - created_by_id: string | null; - expires_at: string | null; - max_uses: number | null; - uses_count: number; - is_active: boolean; - name: string | null; - created_at: string; - role: Role | null; -} - -export interface InviteCreate { - name?: string; - role_id?: number; - expires_at?: string; - max_uses?: number; -} - -export interface InviteUpdate { - name?: string; - role_id?: number; - expires_at?: string; - max_uses?: number; - is_active?: boolean; -} - -export interface RoleCreate { - name: string; - description?: string; - permissions: string[]; - is_default?: boolean; -} - -export interface RoleUpdate { - name?: string; - description?: string; - permissions?: string[]; - is_default?: boolean; -} - -export interface PermissionInfo { - value: string; - name: string; - category: string; -} - -export interface UserAccess { - search_space_id: number; - search_space_name: string; - is_owner: boolean; - role_name: string | null; - permissions: string[]; -} - -export interface InviteInfo { - search_space_name: string; - role_name: string | null; - is_valid: boolean; - message: string | null; -} - -// ============ Members Hook ============ - -export function useMembers(searchSpaceId: number) { - const [members, setMembers] = useState([]); - const [loading, setLoading] = useState(true); - const [error, setError] = useState(null); - - const fetchMembers = useCallback(async () => { - if (!searchSpaceId) return; - - try { - setLoading(true); - const response = await authenticatedFetch( - `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/searchspaces/${searchSpaceId}/members`, - { method: "GET" } - ); - - if (!response.ok) { - const errorData = await response.json().catch(() => ({})); - throw new Error(errorData.detail || "Failed to fetch members"); - } - - const data = await response.json(); - setMembers(data); - setError(null); - return data; - } catch (err: any) { - setError(err.message || "Failed to fetch members"); - console.error("Error fetching members:", err); - } finally { - setLoading(false); - } - }, [searchSpaceId]); - - useEffect(() => { - fetchMembers(); - }, [fetchMembers]); - - const updateMemberRole = useCallback( - async (membershipId: number, roleId: number | null) => { - try { - const response = await authenticatedFetch( - `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/searchspaces/${searchSpaceId}/members/${membershipId}`, - { - headers: { "Content-Type": "application/json" }, - method: "PUT", - body: JSON.stringify({ role_id: roleId }), - } - ); - - if (!response.ok) { - const errorData = await response.json().catch(() => ({})); - throw new Error(errorData.detail || "Failed to update member role"); - } - - const updatedMember = await response.json(); - setMembers((prev) => prev.map((m) => (m.id === membershipId ? updatedMember : m))); - toast.success("Member role updated successfully"); - return updatedMember; - } catch (err: any) { - toast.error(err.message || "Failed to update member role"); - throw err; - } - }, - [searchSpaceId] - ); - - const removeMember = useCallback( - async (membershipId: number) => { - try { - const response = await authenticatedFetch( - `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/searchspaces/${searchSpaceId}/members/${membershipId}`, - { method: "DELETE" } - ); - - if (!response.ok) { - const errorData = await response.json().catch(() => ({})); - throw new Error(errorData.detail || "Failed to remove member"); - } - - setMembers((prev) => prev.filter((m) => m.id !== membershipId)); - toast.success("Member removed successfully"); - return true; - } catch (err: any) { - toast.error(err.message || "Failed to remove member"); - return false; - } - }, - [searchSpaceId] - ); - - const leaveSearchSpace = useCallback(async () => { - try { - const response = await authenticatedFetch( - `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/searchspaces/${searchSpaceId}/members/me`, - { method: "DELETE" } - ); - - if (!response.ok) { - const errorData = await response.json().catch(() => ({})); - throw new Error(errorData.detail || "Failed to leave search space"); - } - - toast.success("Successfully left the search space"); - return true; - } catch (err: any) { - toast.error(err.message || "Failed to leave search space"); - return false; - } - }, [searchSpaceId]); - - return { - members, - loading, - error, - fetchMembers, - updateMemberRole, - removeMember, - leaveSearchSpace, - }; -} - -// ============ Roles Hook ============ - -export function useInvites(searchSpaceId: number) { - const [invites, setInvites] = useState([]); - const [loading, setLoading] = useState(true); - const [error, setError] = useState(null); - - const fetchInvites = useCallback(async () => { - if (!searchSpaceId) return; - - try { - setLoading(true); - const response = await authenticatedFetch( - `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/searchspaces/${searchSpaceId}/invites`, - { method: "GET" } - ); - - if (!response.ok) { - const errorData = await response.json().catch(() => ({})); - throw new Error(errorData.detail || "Failed to fetch invites"); - } - - const data = await response.json(); - setInvites(data); - setError(null); - return data; - } catch (err: any) { - setError(err.message || "Failed to fetch invites"); - console.error("Error fetching invites:", err); - } finally { - setLoading(false); - } - }, [searchSpaceId]); - - useEffect(() => { - fetchInvites(); - }, [fetchInvites]); - - const createInvite = useCallback( - async (inviteData: InviteCreate) => { - try { - const response = await authenticatedFetch( - `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/searchspaces/${searchSpaceId}/invites`, - { - headers: { "Content-Type": "application/json" }, - method: "POST", - body: JSON.stringify(inviteData), - } - ); - - if (!response.ok) { - const errorData = await response.json().catch(() => ({})); - throw new Error(errorData.detail || "Failed to create invite"); - } - - const newInvite = await response.json(); - setInvites((prev) => [...prev, newInvite]); - toast.success("Invite created successfully"); - return newInvite; - } catch (err: any) { - toast.error(err.message || "Failed to create invite"); - throw err; - } - }, - [searchSpaceId] - ); - - const updateInvite = useCallback( - async (inviteId: number, inviteData: InviteUpdate) => { - try { - const response = await authenticatedFetch( - `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/searchspaces/${searchSpaceId}/invites/${inviteId}`, - { - headers: { "Content-Type": "application/json" }, - method: "PUT", - body: JSON.stringify(inviteData), - } - ); - - if (!response.ok) { - const errorData = await response.json().catch(() => ({})); - throw new Error(errorData.detail || "Failed to update invite"); - } - - const updatedInvite = await response.json(); - setInvites((prev) => prev.map((i) => (i.id === inviteId ? updatedInvite : i))); - toast.success("Invite updated successfully"); - return updatedInvite; - } catch (err: any) { - toast.error(err.message || "Failed to update invite"); - throw err; - } - }, - [searchSpaceId] - ); - - const revokeInvite = useCallback( - async (inviteId: number) => { - try { - const response = await authenticatedFetch( - `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/searchspaces/${searchSpaceId}/invites/${inviteId}`, - { method: "DELETE" } - ); - - if (!response.ok) { - const errorData = await response.json().catch(() => ({})); - throw new Error(errorData.detail || "Failed to revoke invite"); - } - - setInvites((prev) => prev.filter((i) => i.id !== inviteId)); - toast.success("Invite revoked successfully"); - return true; - } catch (err: any) { - toast.error(err.message || "Failed to revoke invite"); - return false; - } - }, - [searchSpaceId] - ); - - return { - invites, - loading, - error, - fetchInvites, - createInvite, - updateInvite, - revokeInvite, - }; -} - -// ============ Permissions Hook ============ - -export function useUserAccess(searchSpaceId: number) { - const [access, setAccess] = useState(null); - const [loading, setLoading] = useState(true); - const [error, setError] = useState(null); - - const fetchAccess = useCallback(async () => { - if (!searchSpaceId) return; - - try { - setLoading(true); - const response = await authenticatedFetch( - `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/searchspaces/${searchSpaceId}/my-access`, - { method: "GET" } - ); - - if (!response.ok) { - const errorData = await response.json().catch(() => ({})); - throw new Error(errorData.detail || "Failed to fetch access info"); - } - - const data = await response.json(); - setAccess(data); - setError(null); - return data; - } catch (err: any) { - setError(err.message || "Failed to fetch access info"); - console.error("Error fetching access:", err); - } finally { - setLoading(false); - } - }, [searchSpaceId]); - - useEffect(() => { - fetchAccess(); - }, [fetchAccess]); - - // Helper function to check if user has a specific permission - const hasPermission = useCallback( - (permission: string) => { - if (!access) return false; - // Owner/full access check - if (access.permissions.includes("*")) return true; - return access.permissions.includes(permission); - }, - [access] - ); - - // Helper function to check if user has any of the given permissions - const hasAnyPermission = useCallback( - (permissions: string[]) => { - if (!access) return false; - if (access.permissions.includes("*")) return true; - return permissions.some((p) => access.permissions.includes(p)); - }, - [access] - ); - - return { - access, - loading, - error, - fetchAccess, - hasPermission, - hasAnyPermission, - }; -} - -// ============ Invite Info Hook (Public) ============ - -export function useInviteInfo(inviteCode: string | null) { - const [inviteInfo, setInviteInfo] = useState(null); - const [loading, setLoading] = useState(true); - const [error, setError] = useState(null); - - const fetchInviteInfo = useCallback(async () => { - if (!inviteCode) { - setLoading(false); - return; - } - - try { - setLoading(true); - const response = await fetch( - `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/invites/${inviteCode}/info`, - { - method: "GET", - } - ); - - if (!response.ok) { - const errorData = await response.json().catch(() => ({})); - throw new Error(errorData.detail || "Failed to fetch invite info"); - } - - const data = await response.json(); - setInviteInfo(data); - setError(null); - return data; - } catch (err: any) { - setError(err.message || "Failed to fetch invite info"); - console.error("Error fetching invite info:", err); - } finally { - setLoading(false); - } - }, [inviteCode]); - - useEffect(() => { - fetchInviteInfo(); - }, [fetchInviteInfo]); - - const acceptInvite = useCallback(async () => { - if (!inviteCode) { - toast.error("No invite code provided"); - return null; - } - - try { - const response = await authenticatedFetch( - `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/invites/accept`, - { - headers: { "Content-Type": "application/json" }, - method: "POST", - body: JSON.stringify({ invite_code: inviteCode }), - } - ); - - if (!response.ok) { - const errorData = await response.json().catch(() => ({})); - throw new Error(errorData.detail || "Failed to accept invite"); - } - - const data = await response.json(); - toast.success(data.message || "Successfully joined the search space"); - return data; - } catch (err: any) { - toast.error(err.message || "Failed to accept invite"); - throw err; - } - }, [inviteCode]); - - return { - inviteInfo, - loading, - error, - fetchInviteInfo, - acceptInvite, - }; -} diff --git a/surfsense_web/lib/apis/invites-api.service.ts b/surfsense_web/lib/apis/invites-api.service.ts new file mode 100644 index 000000000..b5954e5f6 --- /dev/null +++ b/surfsense_web/lib/apis/invites-api.service.ts @@ -0,0 +1,151 @@ +import { + type AcceptInviteRequest, + type AcceptInviteResponse, + acceptInviteRequest, + acceptInviteResponse, + type CreateInviteRequest, + type CreateInviteResponse, + createInviteRequest, + createInviteResponse, + type DeleteInviteRequest, + type DeleteInviteResponse, + deleteInviteRequest, + deleteInviteResponse, + type GetInviteInfoRequest, + type GetInviteInfoResponse, + type GetInvitesRequest, + type GetInvitesResponse, + getInviteInfoRequest, + getInviteInfoResponse, + getInvitesRequest, + getInvitesResponse, + type UpdateInviteRequest, + type UpdateInviteResponse, + updateInviteRequest, + updateInviteResponse, +} from "@/contracts/types/invites.types"; +import { ValidationError } from "@/lib/error"; +import { baseApiService } from "./base-api.service"; + +class InvitesApiService { + /** + * Create a new invite + */ + createInvite = async (request: CreateInviteRequest) => { + const parsedRequest = createInviteRequest.safeParse(request); + + if (!parsedRequest.success) { + console.error("Invalid request:", parsedRequest.error); + + const errorMessage = parsedRequest.error.errors.map((err) => err.message).join(", "); + throw new ValidationError(`Invalid request: ${errorMessage}`); + } + + return baseApiService.post( + `/api/v1/searchspaces/${parsedRequest.data.search_space_id}/invites`, + createInviteResponse, + { + body: parsedRequest.data.data, + } + ); + }; + + /** + * Get all invites for a search space + */ + getInvites = async (request: GetInvitesRequest) => { + const parsedRequest = getInvitesRequest.safeParse(request); + + if (!parsedRequest.success) { + console.error("Invalid request:", parsedRequest.error); + + const errorMessage = parsedRequest.error.errors.map((err) => err.message).join(", "); + throw new ValidationError(`Invalid request: ${errorMessage}`); + } + + return baseApiService.get( + `/api/v1/searchspaces/${parsedRequest.data.search_space_id}/invites`, + getInvitesResponse + ); + }; + + /** + * Update an invite + */ + updateInvite = async (request: UpdateInviteRequest) => { + const parsedRequest = updateInviteRequest.safeParse(request); + + if (!parsedRequest.success) { + console.error("Invalid request:", parsedRequest.error); + + const errorMessage = parsedRequest.error.errors.map((err) => err.message).join(", "); + throw new ValidationError(`Invalid request: ${errorMessage}`); + } + + return baseApiService.put( + `/api/v1/searchspaces/${parsedRequest.data.search_space_id}/invites/${parsedRequest.data.invite_id}`, + updateInviteResponse, + { + body: parsedRequest.data.data, + } + ); + }; + + /** + * Delete an invite + */ + deleteInvite = async (request: DeleteInviteRequest) => { + const parsedRequest = deleteInviteRequest.safeParse(request); + + if (!parsedRequest.success) { + console.error("Invalid request:", parsedRequest.error); + + const errorMessage = parsedRequest.error.errors.map((err) => err.message).join(", "); + throw new ValidationError(`Invalid request: ${errorMessage}`); + } + + return baseApiService.delete( + `/api/v1/searchspaces/${parsedRequest.data.search_space_id}/invites/${parsedRequest.data.invite_id}`, + deleteInviteResponse + ); + }; + + /** + * Get invite info by invite code + */ + getInviteInfo = async (request: GetInviteInfoRequest) => { + const parsedRequest = getInviteInfoRequest.safeParse(request); + + if (!parsedRequest.success) { + console.error("Invalid request:", parsedRequest.error); + + const errorMessage = parsedRequest.error.errors.map((err) => err.message).join(", "); + throw new ValidationError(`Invalid request: ${errorMessage}`); + } + + return baseApiService.get( + `/api/v1/invites/${parsedRequest.data.invite_code}/info`, + getInviteInfoResponse + ); + }; + + /** + * Accept an invite + */ + acceptInvite = async (request: AcceptInviteRequest) => { + const parsedRequest = acceptInviteRequest.safeParse(request); + + if (!parsedRequest.success) { + console.error("Invalid request:", parsedRequest.error); + + const errorMessage = parsedRequest.error.errors.map((err) => err.message).join(", "); + throw new ValidationError(`Invalid request: ${errorMessage}`); + } + + return baseApiService.post(`/api/v1/invites/accept`, acceptInviteResponse, { + body: parsedRequest.data, + }); + }; +} + +export const invitesApiService = new InvitesApiService(); diff --git a/surfsense_web/lib/apis/members-api.service.ts b/surfsense_web/lib/apis/members-api.service.ts new file mode 100644 index 000000000..1dc39ad3b --- /dev/null +++ b/surfsense_web/lib/apis/members-api.service.ts @@ -0,0 +1,126 @@ +import { + type DeleteMembershipRequest, + type DeleteMembershipResponse, + deleteMembershipRequest, + deleteMembershipResponse, + type GetMembersRequest, + type GetMembersResponse, + type GetMyAccessRequest, + type GetMyAccessResponse, + getMembersRequest, + getMembersResponse, + getMyAccessRequest, + getMyAccessResponse, + type LeaveSearchSpaceRequest, + type LeaveSearchSpaceResponse, + leaveSearchSpaceRequest, + leaveSearchSpaceResponse, + type UpdateMembershipRequest, + type UpdateMembershipResponse, + updateMembershipRequest, + updateMembershipResponse, +} from "@/contracts/types/members.types"; +import { ValidationError } from "@/lib/error"; +import { baseApiService } from "./base-api.service"; + +class MembersApiService { + /** + * Get members of a search space + */ + getMembers = async (request: GetMembersRequest) => { + const parsedRequest = getMembersRequest.safeParse(request); + + if (!parsedRequest.success) { + console.error("Invalid request:", parsedRequest.error); + + const errorMessage = parsedRequest.error.errors.map((err) => err.message).join(", "); + throw new ValidationError(`Invalid request: ${errorMessage}`); + } + + return baseApiService.get( + `/api/v1/searchspaces/${parsedRequest.data.search_space_id}/members`, + getMembersResponse + ); + }; + + /** + * Update a member's role + */ + updateMember = async (request: UpdateMembershipRequest) => { + const parsedRequest = updateMembershipRequest.safeParse(request); + + if (!parsedRequest.success) { + console.error("Invalid request:", parsedRequest.error); + + const errorMessage = parsedRequest.error.errors.map((err) => err.message).join(", "); + throw new ValidationError(`Invalid request: ${errorMessage}`); + } + + return baseApiService.put( + `/api/v1/searchspaces/${parsedRequest.data.search_space_id}/members/${parsedRequest.data.membership_id}`, + updateMembershipResponse, + { + body: parsedRequest.data.data, + } + ); + }; + + /** + * Delete a member from search space + */ + deleteMember = async (request: DeleteMembershipRequest) => { + const parsedRequest = deleteMembershipRequest.safeParse(request); + + if (!parsedRequest.success) { + console.error("Invalid request:", parsedRequest.error); + + const errorMessage = parsedRequest.error.errors.map((err) => err.message).join(", "); + throw new ValidationError(`Invalid request: ${errorMessage}`); + } + + return baseApiService.delete( + `/api/v1/searchspaces/${parsedRequest.data.search_space_id}/members/${parsedRequest.data.membership_id}`, + deleteMembershipResponse + ); + }; + + /** + * Leave a search space (remove self) + */ + leaveSearchSpace = async (request: LeaveSearchSpaceRequest) => { + const parsedRequest = leaveSearchSpaceRequest.safeParse(request); + + if (!parsedRequest.success) { + console.error("Invalid request:", parsedRequest.error); + + const errorMessage = parsedRequest.error.errors.map((err) => err.message).join(", "); + throw new ValidationError(`Invalid request: ${errorMessage}`); + } + + return baseApiService.delete( + `/api/v1/searchspaces/${parsedRequest.data.search_space_id}/members/me`, + leaveSearchSpaceResponse + ); + }; + + /** + * Get current user's access information for a search space + */ + getMyAccess = async (request: GetMyAccessRequest) => { + const parsedRequest = getMyAccessRequest.safeParse(request); + + if (!parsedRequest.success) { + console.error("Invalid request:", parsedRequest.error); + + const errorMessage = parsedRequest.error.errors.map((err) => err.message).join(", "); + throw new ValidationError(`Invalid request: ${errorMessage}`); + } + + return baseApiService.get( + `/api/v1/searchspaces/${parsedRequest.data.search_space_id}/my-access`, + getMyAccessResponse + ); + }; +} + +export const membersApiService = new MembersApiService(); diff --git a/surfsense_web/lib/query-client/cache-keys.ts b/surfsense_web/lib/query-client/cache-keys.ts index db7af6636..a8eecb605 100644 --- a/surfsense_web/lib/query-client/cache-keys.ts +++ b/surfsense_web/lib/query-client/cache-keys.ts @@ -1,6 +1,7 @@ import type { GetChatsRequest } from "@/contracts/types/chat.types"; import type { GetDocumentsRequest } from "@/contracts/types/document.types"; import type { GetLLMConfigsRequest } from "@/contracts/types/llm-config.types"; +import type { GetMembersRequest } from "@/contracts/types/members.types"; import type { GetPodcastsRequest } from "@/contracts/types/podcast.types"; import type { GetRolesRequest } from "@/contracts/types/roles.types"; import type { GetSearchSpacesRequest } from "@/contracts/types/search-space.types"; @@ -52,4 +53,12 @@ export const cacheKeys = { permissions: { all: () => ["permissions"] as const, }, + members: { + all: (searchSpaceId: string) => ["members", searchSpaceId] as const, + myAccess: (searchSpaceId: string) => ["members", "my-access", searchSpaceId] as const, + }, + invites: { + all: (searchSpaceId: string) => ["invites", searchSpaceId] as const, + info: (inviteCode: string) => ["invites", "info", inviteCode] as const, + }, };