diff --git a/surfsense_backend/alembic/versions/23_associate_connectors_with_search_spaces.py b/surfsense_backend/alembic/versions/23_associate_connectors_with_search_spaces.py index 20e9d7840..a693b9ec5 100644 --- a/surfsense_backend/alembic/versions/23_associate_connectors_with_search_spaces.py +++ b/surfsense_backend/alembic/versions/23_associate_connectors_with_search_spaces.py @@ -2,7 +2,6 @@ Revision ID: '23' Revises: '22' -Create Date: 2025-01-10 12:00:00.000000 """ diff --git a/surfsense_backend/alembic/versions/24_fix_null_chat_types.py b/surfsense_backend/alembic/versions/24_fix_null_chat_types.py index 35313d27b..e0d371f1e 100644 --- a/surfsense_backend/alembic/versions/24_fix_null_chat_types.py +++ b/surfsense_backend/alembic/versions/24_fix_null_chat_types.py @@ -2,7 +2,6 @@ Revision ID: 24 Revises: 23 -Create Date: 2025-01-10 14:00:00.000000 """ diff --git a/surfsense_backend/alembic/versions/25_migrate_llm_configs_to_search_spaces.py b/surfsense_backend/alembic/versions/25_migrate_llm_configs_to_search_spaces.py index 116a3c687..c9966599c 100644 --- a/surfsense_backend/alembic/versions/25_migrate_llm_configs_to_search_spaces.py +++ b/surfsense_backend/alembic/versions/25_migrate_llm_configs_to_search_spaces.py @@ -2,7 +2,6 @@ Revision ID: 25 Revises: 24 -Create Date: 2025-01-10 14:00:00.000000 Changes: 1. Migrate llm_configs from user association to search_space association diff --git a/surfsense_backend/alembic/versions/26_add_language_column_to_llm_configs.py b/surfsense_backend/alembic/versions/26_add_language_column_to_llm_configs.py new file mode 100644 index 000000000..e5cdc37d7 --- /dev/null +++ b/surfsense_backend/alembic/versions/26_add_language_column_to_llm_configs.py @@ -0,0 +1,69 @@ +"""Add language column to llm_configs + +Revision ID: 26 +Revises: 25 + +Changes: +1. Add language column to llm_configs table with default value of 'English' +""" + +from collections.abc import Sequence + +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "26" +down_revision: str | None = "25" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Add language column to llm_configs table.""" + + from sqlalchemy import inspect + + conn = op.get_bind() + inspector = inspect(conn) + + # Get existing columns + llm_config_columns = [col["name"] for col in inspector.get_columns("llm_configs")] + + # Add language column if it doesn't exist + if "language" not in llm_config_columns: + op.add_column( + "llm_configs", + sa.Column( + "language", + sa.String(length=50), + nullable=True, + server_default="English", + ), + ) + + # Update existing rows to have 'English' as default + op.execute( + """ + UPDATE llm_configs + SET language = 'English' + WHERE language IS NULL + """ + ) + + +def downgrade() -> None: + """Remove language column from llm_configs table.""" + + from sqlalchemy import inspect + + conn = op.get_bind() + inspector = inspect(conn) + + # Get existing columns + llm_config_columns = [col["name"] for col in inspector.get_columns("llm_configs")] + + # Drop language column if it exists + if "language" in llm_config_columns: + op.drop_column("llm_configs", "language") diff --git a/surfsense_backend/app/agents/researcher/configuration.py b/surfsense_backend/app/agents/researcher/configuration.py index 3e81a59c0..24d8c819e 100644 --- a/surfsense_backend/app/agents/researcher/configuration.py +++ b/surfsense_backend/app/agents/researcher/configuration.py @@ -37,6 +37,7 @@ class Configuration: search_mode: SearchMode research_mode: ResearchMode document_ids_to_add_in_context: list[int] + language: str | None = None @classmethod def from_runnable_config( diff --git a/surfsense_backend/app/agents/researcher/nodes.py b/surfsense_backend/app/agents/researcher/nodes.py index 0835fb861..fe869c265 100644 --- a/surfsense_backend/app/agents/researcher/nodes.py +++ b/surfsense_backend/app/agents/researcher/nodes.py @@ -578,6 +578,7 @@ async def write_answer_outline( num_sections = configuration.num_sections user_id = configuration.user_id search_space_id = configuration.search_space_id + language = configuration.language # Get language from configuration writer( { @@ -628,7 +629,7 @@ async def write_answer_outline( # Create messages for the LLM messages = [ - SystemMessage(content=get_answer_outline_system_prompt()), + SystemMessage(content=get_answer_outline_system_prompt(language=language)), HumanMessage(content=human_message_content), ] @@ -2000,6 +2001,7 @@ async def handle_qna_workflow( "relevant_documents": all_documents, # Use combined documents "user_id": configuration.user_id, "search_space_id": configuration.search_space_id, + "language": configuration.language, } } diff --git a/surfsense_backend/app/agents/researcher/prompts.py b/surfsense_backend/app/agents/researcher/prompts.py index 44b218913..825772a24 100644 --- a/surfsense_backend/app/agents/researcher/prompts.py +++ b/surfsense_backend/app/agents/researcher/prompts.py @@ -1,9 +1,18 @@ import datetime -def get_answer_outline_system_prompt(): +def _build_language_instruction(language: str | None = None): + if language: + return f"\n\nIMPORTANT: Please respond in {language} language. All your responses, explanations, and analysis should be written in {language}." + return "" + + +def get_answer_outline_system_prompt(language: str | None = None) -> str: + language_instruction = _build_language_instruction(language) + return f""" Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")} +{language_instruction} You are an expert research assistant specializing in structuring information. Your task is to create a detailed and logical research outline based on the user's query. This outline will serve as the blueprint for generating a comprehensive research report. diff --git a/surfsense_backend/app/agents/researcher/qna_agent/configuration.py b/surfsense_backend/app/agents/researcher/qna_agent/configuration.py index 5a4529e0d..ea107a575 100644 --- a/surfsense_backend/app/agents/researcher/qna_agent/configuration.py +++ b/surfsense_backend/app/agents/researcher/qna_agent/configuration.py @@ -20,6 +20,7 @@ class Configuration: ] # Documents provided directly to the agent for answering user_id: str # User identifier search_space_id: int # Search space identifier + language: str | None = None # Language for responses @classmethod def from_runnable_config( diff --git a/surfsense_backend/app/agents/researcher/qna_agent/nodes.py b/surfsense_backend/app/agents/researcher/qna_agent/nodes.py index fd6861efb..c4e79d685 100644 --- a/surfsense_backend/app/agents/researcher/qna_agent/nodes.py +++ b/surfsense_backend/app/agents/researcher/qna_agent/nodes.py @@ -102,7 +102,7 @@ async def answer_question(state: State, config: RunnableConfig) -> dict[str, Any user_query = configuration.user_query user_id = configuration.user_id search_space_id = configuration.search_space_id - + language = configuration.language # Get user's fast LLM llm = await get_user_fast_llm(state.db_session, user_id, search_space_id) if not llm: @@ -127,7 +127,9 @@ async def answer_question(state: State, config: RunnableConfig) -> dict[str, Any """ # Use initial system prompt for token calculation - initial_system_prompt = get_qna_citation_system_prompt(chat_history_str) + initial_system_prompt = get_qna_citation_system_prompt( + chat_history_str, language + ) base_messages = [ SystemMessage(content=initial_system_prompt), HumanMessage(content=base_human_message_template), @@ -146,9 +148,9 @@ async def answer_question(state: State, config: RunnableConfig) -> dict[str, Any # Choose system prompt based on final document availability system_prompt = ( - get_qna_citation_system_prompt(chat_history_str) + get_qna_citation_system_prompt(chat_history_str, language) if has_documents - else get_qna_no_documents_system_prompt(chat_history_str) + else get_qna_no_documents_system_prompt(chat_history_str, language) ) # Generate documents section diff --git a/surfsense_backend/app/agents/researcher/qna_agent/prompts.py b/surfsense_backend/app/agents/researcher/qna_agent/prompts.py index 212788804..9c35f90cc 100644 --- a/surfsense_backend/app/agents/researcher/qna_agent/prompts.py +++ b/surfsense_backend/app/agents/researcher/qna_agent/prompts.py @@ -1,7 +1,11 @@ import datetime +from ..prompts import _build_language_instruction -def get_qna_citation_system_prompt(chat_history: str | None = None): + +def get_qna_citation_system_prompt( + chat_history: str | None = None, language: str | None = None +): chat_history_section = ( f""" @@ -16,9 +20,11 @@ NO CHAT HISTORY PROVIDED """ ) + # Add language instruction if specified + language_instruction = _build_language_instruction(language) return f""" Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")} -You are SurfSense, an advanced AI research assistant that provides detailed, well-researched answers to user questions by synthesizing information from multiple personal knowledge sources. +You are SurfSense, an advanced AI research assistant that provides detailed, well-researched answers to user questions by synthesizing information from multiple personal knowledge sources.{language_instruction} {chat_history_section} - EXTENSION: "Web content saved via SurfSense browser extension" (personal browsing history) @@ -149,7 +155,9 @@ Make sure your response: """ -def get_qna_no_documents_system_prompt(chat_history: str | None = None): +def get_qna_no_documents_system_prompt( + chat_history: str | None = None, language: str | None = None +): chat_history_section = ( f""" @@ -164,9 +172,12 @@ NO CHAT HISTORY PROVIDED """ ) + # Add language instruction if specified + language_instruction = _build_language_instruction(language) + return f""" Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")} -You are SurfSense, an advanced AI research assistant that provides helpful, detailed answers to user questions in a conversational manner. +You are SurfSense, an advanced AI research assistant that provides helpful, detailed answers to user questions in a conversational manner.{language_instruction} {chat_history_section} The user has asked a question but there are no specific documents from their personal knowledge base available to answer it. You should provide a helpful response based on: diff --git a/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py b/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py index c3d487671..3c34eb474 100644 --- a/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py +++ b/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py @@ -1,7 +1,11 @@ import datetime +from ..prompts import _build_language_instruction -def get_citation_system_prompt(chat_history: str | None = None): + +def get_citation_system_prompt( + chat_history: str | None = None, language: str | None = None +): chat_history_section = ( f""" @@ -16,9 +20,12 @@ NO CHAT HISTORY PROVIDED """ ) + # Add language instruction if specified + language_instruction = _build_language_instruction(language) + return f""" Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")} -You are SurfSense, an advanced AI research assistant that synthesizes information from multiple knowledge sources to provide comprehensive, well-cited answers to user queries. +You are SurfSense, an advanced AI research assistant that synthesizes information from multiple knowledge sources to provide comprehensive, well-cited answers to user queries.{language_instruction} {chat_history_section} - EXTENSION: "Web content saved via SurfSense browser extension" (personal browsing history) @@ -156,7 +163,9 @@ Make sure your response: """ -def get_no_documents_system_prompt(chat_history: str | None = None): +def get_no_documents_system_prompt( + chat_history: str | None = None, language: str | None = None +): chat_history_section = ( f""" @@ -171,9 +180,12 @@ NO CHAT HISTORY PROVIDED """ ) + # Add language instruction if specified + language_instruction = _build_language_instruction(language) + return f""" Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")} -You are SurfSense, an advanced AI research assistant that helps users create well-structured content for their documents and research. +You are SurfSense, an advanced AI research assistant that helps users create well-structured content for their documents and research.{language_instruction} {chat_history_section} You are writing content for a specific sub-section of a document. No specific documents from the user's personal knowledge base are available, so you should create content based on: diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index eb33145cf..e476ed8e5 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -296,6 +296,8 @@ class LLMConfig(BaseModel, TimestampMixin): api_key = Column(String, nullable=False) api_base = Column(String(500), nullable=True) + language = Column(String(50), nullable=True, default="English") + # For any other parameters that litellm supports litellm_params = Column(JSON, nullable=True, default={}) diff --git a/surfsense_backend/app/routes/chats_routes.py b/surfsense_backend/app/routes/chats_routes.py index e4d02686f..e003dc260 100644 --- a/surfsense_backend/app/routes/chats_routes.py +++ b/surfsense_backend/app/routes/chats_routes.py @@ -4,8 +4,9 @@ from langchain.schema import AIMessage, HumanMessage from sqlalchemy.exc import IntegrityError, OperationalError from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select +from sqlalchemy.orm import selectinload -from app.db import Chat, SearchSpace, User, get_async_session +from app.db import Chat, SearchSpace, User, UserSearchSpacePreference, get_async_session from app.schemas import ( AISDKChatRequest, ChatCreate, @@ -53,10 +54,51 @@ async def handle_chat_data( request_data.get("document_ids_to_add_in_context") ) search_mode_str = validate_search_mode(request_data.get("search_mode")) + # print("RESQUEST DATA:", request_data) + # print("SELECTED CONNECTORS:", selected_connectors) # Check if the search space belongs to the current user try: await check_ownership(session, SearchSpace, search_space_id, user) + language_result = await session.execute( + select(UserSearchSpacePreference) + .options( + selectinload(UserSearchSpacePreference.search_space).selectinload( + SearchSpace.llm_configs + ), + selectinload(UserSearchSpacePreference.long_context_llm), + selectinload(UserSearchSpacePreference.fast_llm), + selectinload(UserSearchSpacePreference.strategic_llm), + ) + .filter( + UserSearchSpacePreference.search_space_id == search_space_id, + UserSearchSpacePreference.user_id == user.id, + ) + ) + user_preference = language_result.scalars().first() + # print("UserSearchSpacePreference:", user_preference) + + language = None + if ( + user_preference + and user_preference.search_space + and user_preference.search_space.llm_configs + ): + llm_configs = user_preference.search_space.llm_configs + + for preferred_llm in [ + user_preference.fast_llm, + user_preference.long_context_llm, + user_preference.strategic_llm, + ]: + if preferred_llm and getattr(preferred_llm, "language", None): + language = preferred_llm.language + break + + if not language: + first_llm_config = llm_configs[0] + language = getattr(first_llm_config, "language", None) + except HTTPException: raise HTTPException( status_code=403, detail="You don't have access to this search space" @@ -80,6 +122,7 @@ async def handle_chat_data( langchain_chat_history, search_mode_str, document_ids_to_add_in_context, + language, ) ) diff --git a/surfsense_backend/app/routes/llm_config_routes.py b/surfsense_backend/app/routes/llm_config_routes.py index 63d540d2c..ec8ea5846 100644 --- a/surfsense_backend/app/routes/llm_config_routes.py +++ b/surfsense_backend/app/routes/llm_config_routes.py @@ -300,6 +300,9 @@ async def update_user_llm_preferences( # Validate that all provided LLM config IDs belong to the search space update_data = preferences.model_dump(exclude_unset=True) + # Store language from configs to validate consistency + languages = set() + for _key, llm_config_id in update_data.items(): if llm_config_id is not None: # Verify the LLM config belongs to the search space @@ -316,6 +319,16 @@ async def update_user_llm_preferences( detail=f"LLM configuration {llm_config_id} not found in this search space", ) + # Collect language for consistency check + languages.add(llm_config.language) + + # Check if all selected LLM configs have the same language + if len(languages) > 1: + raise HTTPException( + status_code=400, + detail="All selected LLM configurations must have the same language setting", + ) + # Update user preferences for key, value in update_data.items(): setattr(preference, key, value) diff --git a/surfsense_backend/app/schemas/llm_config.py b/surfsense_backend/app/schemas/llm_config.py index 8beb65347..285c15665 100644 --- a/surfsense_backend/app/schemas/llm_config.py +++ b/surfsense_backend/app/schemas/llm_config.py @@ -26,6 +26,9 @@ class LLMConfigBase(BaseModel): litellm_params: dict[str, Any] | None = Field( default=None, description="Additional LiteLLM parameters" ) + language: str | None = Field( + default="English", max_length=50, description="Language for the LLM" + ) class LLMConfigCreate(LLMConfigBase): @@ -49,6 +52,9 @@ class LLMConfigUpdate(BaseModel): api_base: str | None = Field( None, max_length=500, description="Optional API base URL" ) + language: str | None = Field( + None, max_length=50, description="Language for the LLM" + ) litellm_params: dict[str, Any] | None = Field( None, description="Additional LiteLLM parameters" ) diff --git a/surfsense_backend/app/tasks/stream_connector_search_results.py b/surfsense_backend/app/tasks/stream_connector_search_results.py index ead6a89e7..dd1ae4ce5 100644 --- a/surfsense_backend/app/tasks/stream_connector_search_results.py +++ b/surfsense_backend/app/tasks/stream_connector_search_results.py @@ -20,6 +20,7 @@ async def stream_connector_search_results( langchain_chat_history: list[Any], search_mode_str: str, document_ids_to_add_in_context: list[int], + language: str | None = None, ) -> AsyncGenerator[str, None]: """ Stream connector search results to the client @@ -66,8 +67,10 @@ async def stream_connector_search_results( "search_mode": search_mode, "research_mode": research_mode, "document_ids_to_add_in_context": document_ids_to_add_in_context, + "language": language, # Add language to the configuration } } + # print(f"Researcher configuration: {config['configurable']}") # Debug print # Initialize state with database session and streaming service initial_state = State( db_session=session, diff --git a/surfsense_web/components/onboard/add-provider-step.tsx b/surfsense_web/components/onboard/add-provider-step.tsx index 9b70c8d7f..6517fe0a1 100644 --- a/surfsense_web/components/onboard/add-provider-step.tsx +++ b/surfsense_web/components/onboard/add-provider-step.tsx @@ -18,6 +18,7 @@ import { SelectValue, } from "@/components/ui/select"; import { LLM_PROVIDERS } from "@/contracts/enums/llm-providers"; +import { LANGUAGES } from "@/contracts/enums/languages"; import { type CreateLLMConfig, useLLMConfigs } from "@/hooks/use-llm-configs"; import InferenceParamsEditor from "../inference-params-editor"; @@ -42,6 +43,7 @@ export function AddProviderStep({ model_name: "", api_key: "", api_base: "", + language: "English", litellm_params: {}, search_space_id: searchSpaceId, }); @@ -70,6 +72,7 @@ export function AddProviderStep({ model_name: "", api_key: "", api_base: "", + language: "English", litellm_params: {}, search_space_id: searchSpaceId, }); @@ -119,6 +122,7 @@ export function AddProviderStep({

Model: {config.model_name} + {config.language && ` • Language: ${config.language}`} {config.api_base && ` • Base: ${config.api_base}`}

@@ -169,7 +173,7 @@ export function AddProviderStep({
-
+
+ + {/* language */} +
+ + +
+
{formData.provider === "CUSTOM" && ( diff --git a/surfsense_web/components/settings/model-config-manager.tsx b/surfsense_web/components/settings/model-config-manager.tsx index 7384337b6..5719b01d1 100644 --- a/surfsense_web/components/settings/model-config-manager.tsx +++ b/surfsense_web/components/settings/model-config-manager.tsx @@ -38,6 +38,7 @@ import { SelectValue, } from "@/components/ui/select"; import { LLM_PROVIDERS } from "@/contracts/enums/llm-providers"; +import { LANGUAGES } from "@/contracts/enums/languages"; import { type CreateLLMConfig, type LLMConfig, useLLMConfigs } from "@/hooks/use-llm-configs"; import InferenceParamsEditor from "../inference-params-editor"; @@ -65,6 +66,7 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) { model_name: "", api_key: "", api_base: "", + language: "English", litellm_params: {}, search_space_id: searchSpaceId, }); @@ -80,6 +82,7 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) { model_name: editingConfig.model_name, api_key: editingConfig.api_key, api_base: editingConfig.api_base || "", + language: editingConfig.language || "English", litellm_params: editingConfig.litellm_params || {}, search_space_id: searchSpaceId, }); @@ -118,6 +121,7 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) { model_name: "", api_key: "", api_base: "", + language: "English", litellm_params: {}, search_space_id: searchSpaceId, }); @@ -323,6 +327,13 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) {

{config.model_name}

+ {config.language && ( +
+ + {config.language} + +
+ )}
@@ -432,6 +443,7 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) { model_name: "", api_key: "", api_base: "", + language: "", litellm_params: {}, search_space_id: searchSpaceId, }); @@ -524,6 +536,25 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) { )} +
+ + +
+
; created_at: string; search_space_id: number; @@ -31,6 +32,7 @@ export interface CreateLLMConfig { model_name: string; api_key: string; api_base?: string; + language?: string; litellm_params?: Record; search_space_id: number; }