diff --git a/surfsense_backend/app/agents/researcher/configuration.py b/surfsense_backend/app/agents/researcher/configuration.py index 3e81a59c0..e8ee856de 100644 --- a/surfsense_backend/app/agents/researcher/configuration.py +++ b/surfsense_backend/app/agents/researcher/configuration.py @@ -37,6 +37,8 @@ class Configuration: search_mode: SearchMode research_mode: ResearchMode document_ids_to_add_in_context: list[int] + language: str | None = None + @classmethod def from_runnable_config( diff --git a/surfsense_backend/app/agents/researcher/nodes.py b/surfsense_backend/app/agents/researcher/nodes.py index 0835fb861..fe869c265 100644 --- a/surfsense_backend/app/agents/researcher/nodes.py +++ b/surfsense_backend/app/agents/researcher/nodes.py @@ -578,6 +578,7 @@ async def write_answer_outline( num_sections = configuration.num_sections user_id = configuration.user_id search_space_id = configuration.search_space_id + language = configuration.language # Get language from configuration writer( { @@ -628,7 +629,7 @@ async def write_answer_outline( # Create messages for the LLM messages = [ - SystemMessage(content=get_answer_outline_system_prompt()), + SystemMessage(content=get_answer_outline_system_prompt(language=language)), HumanMessage(content=human_message_content), ] @@ -2000,6 +2001,7 @@ async def handle_qna_workflow( "relevant_documents": all_documents, # Use combined documents "user_id": configuration.user_id, "search_space_id": configuration.search_space_id, + "language": configuration.language, } } diff --git a/surfsense_backend/app/agents/researcher/prompts.py b/surfsense_backend/app/agents/researcher/prompts.py index 44b218913..b7265602a 100644 --- a/surfsense_backend/app/agents/researcher/prompts.py +++ b/surfsense_backend/app/agents/researcher/prompts.py @@ -1,9 +1,14 @@ import datetime -def get_answer_outline_system_prompt(): +def get_answer_outline_system_prompt(language: str | None = None) -> str: + language_instruction = "" + if language: + language_instruction = f"\n\nIMPORTANT: Please respond in {language} language. All your responses, explanations, and analysis should be written in {language}." + return f""" Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")} +{language_instruction} You are an expert research assistant specializing in structuring information. Your task is to create a detailed and logical research outline based on the user's query. This outline will serve as the blueprint for generating a comprehensive research report. diff --git a/surfsense_backend/app/agents/researcher/qna_agent/configuration.py b/surfsense_backend/app/agents/researcher/qna_agent/configuration.py index 5a4529e0d..ea107a575 100644 --- a/surfsense_backend/app/agents/researcher/qna_agent/configuration.py +++ b/surfsense_backend/app/agents/researcher/qna_agent/configuration.py @@ -20,6 +20,7 @@ class Configuration: ] # Documents provided directly to the agent for answering user_id: str # User identifier search_space_id: int # Search space identifier + language: str | None = None # Language for responses @classmethod def from_runnable_config( diff --git a/surfsense_backend/app/agents/researcher/qna_agent/nodes.py b/surfsense_backend/app/agents/researcher/qna_agent/nodes.py index fd6861efb..20374b706 100644 --- a/surfsense_backend/app/agents/researcher/qna_agent/nodes.py +++ b/surfsense_backend/app/agents/researcher/qna_agent/nodes.py @@ -102,7 +102,7 @@ async def answer_question(state: State, config: RunnableConfig) -> dict[str, Any user_query = configuration.user_query user_id = configuration.user_id search_space_id = configuration.search_space_id - + language = configuration.language # Get user's fast LLM llm = await get_user_fast_llm(state.db_session, user_id, search_space_id) if not llm: @@ -127,7 +127,7 @@ async def answer_question(state: State, config: RunnableConfig) -> dict[str, Any """ # Use initial system prompt for token calculation - initial_system_prompt = get_qna_citation_system_prompt(chat_history_str) + initial_system_prompt = get_qna_citation_system_prompt(chat_history_str, language) base_messages = [ SystemMessage(content=initial_system_prompt), HumanMessage(content=base_human_message_template), @@ -146,9 +146,9 @@ async def answer_question(state: State, config: RunnableConfig) -> dict[str, Any # Choose system prompt based on final document availability system_prompt = ( - get_qna_citation_system_prompt(chat_history_str) + get_qna_citation_system_prompt(chat_history_str, language) if has_documents - else get_qna_no_documents_system_prompt(chat_history_str) + else get_qna_no_documents_system_prompt(chat_history_str, language) ) # Generate documents section diff --git a/surfsense_backend/app/agents/researcher/qna_agent/prompts.py b/surfsense_backend/app/agents/researcher/qna_agent/prompts.py index 212788804..deb5dd59f 100644 --- a/surfsense_backend/app/agents/researcher/qna_agent/prompts.py +++ b/surfsense_backend/app/agents/researcher/qna_agent/prompts.py @@ -1,7 +1,7 @@ import datetime -def get_qna_citation_system_prompt(chat_history: str | None = None): +def get_qna_citation_system_prompt(chat_history: str | None = None, language: str | None = None): chat_history_section = ( f""" @@ -15,10 +15,15 @@ NO CHAT HISTORY PROVIDED """ ) + + # Add language instruction if specified + language_instruction = "" + if language: + language_instruction = f"\n\nIMPORTANT: Please respond in {language} language. All your responses, explanations, and analysis should be written in {language}." return f""" Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")} -You are SurfSense, an advanced AI research assistant that provides detailed, well-researched answers to user questions by synthesizing information from multiple personal knowledge sources. +You are SurfSense, an advanced AI research assistant that provides detailed, well-researched answers to user questions by synthesizing information from multiple personal knowledge sources.{language_instruction} {chat_history_section} - EXTENSION: "Web content saved via SurfSense browser extension" (personal browsing history) @@ -149,7 +154,7 @@ Make sure your response: """ -def get_qna_no_documents_system_prompt(chat_history: str | None = None): +def get_qna_no_documents_system_prompt(chat_history: str | None = None, language: str | None = None): chat_history_section = ( f""" @@ -163,10 +168,15 @@ NO CHAT HISTORY PROVIDED """ ) + + # Add language instruction if specified + language_instruction = "" + if language: + language_instruction = f"\n\nIMPORTANT: Please respond in {language} language. All your responses, explanations, and analysis should be written in {language}." return f""" Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")} -You are SurfSense, an advanced AI research assistant that provides helpful, detailed answers to user questions in a conversational manner. +You are SurfSense, an advanced AI research assistant that provides helpful, detailed answers to user questions in a conversational manner.{language_instruction} {chat_history_section} The user has asked a question but there are no specific documents from their personal knowledge base available to answer it. You should provide a helpful response based on: diff --git a/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py b/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py index c3d487671..3954d47e5 100644 --- a/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py +++ b/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py @@ -1,7 +1,7 @@ import datetime -def get_citation_system_prompt(chat_history: str | None = None): +def get_citation_system_prompt(chat_history: str | None = None, language: str | None = None): chat_history_section = ( f""" @@ -15,10 +15,15 @@ NO CHAT HISTORY PROVIDED """ ) + + # Add language instruction if specified + language_instruction = "" + if language: + language_instruction = f"\n\nIMPORTANT: Please respond in {language} language. All your responses, explanations, and analysis should be written in {language}." return f""" Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")} -You are SurfSense, an advanced AI research assistant that synthesizes information from multiple knowledge sources to provide comprehensive, well-cited answers to user queries. +You are SurfSense, an advanced AI research assistant that synthesizes information from multiple knowledge sources to provide comprehensive, well-cited answers to user queries.{language_instruction} {chat_history_section} - EXTENSION: "Web content saved via SurfSense browser extension" (personal browsing history) @@ -156,7 +161,7 @@ Make sure your response: """ -def get_no_documents_system_prompt(chat_history: str | None = None): +def get_no_documents_system_prompt(chat_history: str | None = None, language: str | None = None): chat_history_section = ( f""" @@ -170,10 +175,15 @@ NO CHAT HISTORY PROVIDED """ ) + + # Add language instruction if specified + language_instruction = "" + if language: + language_instruction = f"\n\nIMPORTANT: Please respond in {language} language. All your responses, explanations, and analysis should be written in {language}." return f""" Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")} -You are SurfSense, an advanced AI research assistant that helps users create well-structured content for their documents and research. +You are SurfSense, an advanced AI research assistant that helps users create well-structured content for their documents and research.{language_instruction} {chat_history_section} You are writing content for a specific sub-section of a document. No specific documents from the user's personal knowledge base are available, so you should create content based on: diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index eb33145cf..e476ed8e5 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -296,6 +296,8 @@ class LLMConfig(BaseModel, TimestampMixin): api_key = Column(String, nullable=False) api_base = Column(String(500), nullable=True) + language = Column(String(50), nullable=True, default="English") + # For any other parameters that litellm supports litellm_params = Column(JSON, nullable=True, default={}) diff --git a/surfsense_backend/app/routes/chats_routes.py b/surfsense_backend/app/routes/chats_routes.py index e4d02686f..d30bf5451 100644 --- a/surfsense_backend/app/routes/chats_routes.py +++ b/surfsense_backend/app/routes/chats_routes.py @@ -4,8 +4,10 @@ from langchain.schema import AIMessage, HumanMessage from sqlalchemy.exc import IntegrityError, OperationalError from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select +from sqlalchemy.orm import selectinload -from app.db import Chat, SearchSpace, User, get_async_session + +from app.db import Chat, SearchSpace, User, UserSearchSpacePreference, get_async_session from app.schemas import ( AISDKChatRequest, ChatCreate, @@ -53,21 +55,60 @@ async def handle_chat_data( request_data.get("document_ids_to_add_in_context") ) search_mode_str = validate_search_mode(request_data.get("search_mode")) + # print("RESQUEST DATA:", request_data) + # print("SELECTED CONNECTORS:", selected_connectors) # Check if the search space belongs to the current user try: await check_ownership(session, SearchSpace, search_space_id, user) + language_result = await session.execute( + select(UserSearchSpacePreference) + .options( + selectinload(UserSearchSpacePreference.search_space).selectinload(SearchSpace.llm_configs), + selectinload(UserSearchSpacePreference.long_context_llm), + selectinload(UserSearchSpacePreference.fast_llm), + selectinload(UserSearchSpacePreference.strategic_llm) + ) + .filter( + UserSearchSpacePreference.search_space_id == search_space_id, + UserSearchSpacePreference.user_id == user.id + ) + ) + user_preference = language_result.scalars().first() + print("UserSearchSpacePreference:", user_preference) + + language = None + if user_preference and user_preference.search_space and user_preference.search_space.llm_configs: + llm_configs = user_preference.search_space.llm_configs + # print(f"Found {len(llm_configs)} LLM Configs") + # for i, config in enumerate(llm_configs): + # print(f" Config {i+1}: name={config.name}, provider={config.provider}, language={getattr(config, 'language', None)}") + + + for preferred_llm in [user_preference.fast_llm, user_preference.long_context_llm, user_preference.strategic_llm]: + if preferred_llm and getattr(preferred_llm, 'language', None): + language = preferred_llm.language + # print(f"Using language from preferred LLM: {preferred_llm.name} -> {language}") + break + + # no preferred llM has language use first available LLM config + if not language: + first_llm_config = llm_configs[0] + language = getattr(first_llm_config, 'language', None) + # print(f"Using language from first LLM config: {first_llm_config.name} -> {language}") + except HTTPException: raise HTTPException( status_code=403, detail="You don't have access to this search space" ) from None - + # print("Language selected:", language) langchain_chat_history = [] for message in messages[:-1]: if message["role"] == "user": langchain_chat_history.append(HumanMessage(content=message["content"])) elif message["role"] == "assistant": langchain_chat_history.append(AIMessage(content=message["content"])) + response = StreamingResponse( stream_connector_search_results( @@ -80,6 +121,7 @@ async def handle_chat_data( langchain_chat_history, search_mode_str, document_ids_to_add_in_context, + language, ) ) diff --git a/surfsense_backend/app/routes/llm_config_routes.py b/surfsense_backend/app/routes/llm_config_routes.py index 63d540d2c..896f7be41 100644 --- a/surfsense_backend/app/routes/llm_config_routes.py +++ b/surfsense_backend/app/routes/llm_config_routes.py @@ -299,7 +299,10 @@ async def update_user_llm_preferences( # Validate that all provided LLM config IDs belong to the search space update_data = preferences.model_dump(exclude_unset=True) - + + # Store language from configs to validate consistency + languages = set() + for _key, llm_config_id in update_data.items(): if llm_config_id is not None: # Verify the LLM config belongs to the search space @@ -315,6 +318,16 @@ async def update_user_llm_preferences( status_code=404, detail=f"LLM configuration {llm_config_id} not found in this search space", ) + + # Collect language for consistency check + languages.add(llm_config.language) + + # Check if all selected LLM configs have the same language + if len(languages) > 1: + raise HTTPException( + status_code=400, + detail="All selected LLM configurations must have the same language setting", + ) # Update user preferences for key, value in update_data.items(): diff --git a/surfsense_backend/app/schemas/llm_config.py b/surfsense_backend/app/schemas/llm_config.py index 8beb65347..285c15665 100644 --- a/surfsense_backend/app/schemas/llm_config.py +++ b/surfsense_backend/app/schemas/llm_config.py @@ -26,6 +26,9 @@ class LLMConfigBase(BaseModel): litellm_params: dict[str, Any] | None = Field( default=None, description="Additional LiteLLM parameters" ) + language: str | None = Field( + default="English", max_length=50, description="Language for the LLM" + ) class LLMConfigCreate(LLMConfigBase): @@ -49,6 +52,9 @@ class LLMConfigUpdate(BaseModel): api_base: str | None = Field( None, max_length=500, description="Optional API base URL" ) + language: str | None = Field( + None, max_length=50, description="Language for the LLM" + ) litellm_params: dict[str, Any] | None = Field( None, description="Additional LiteLLM parameters" ) diff --git a/surfsense_backend/app/tasks/stream_connector_search_results.py b/surfsense_backend/app/tasks/stream_connector_search_results.py index ead6a89e7..dd1ae4ce5 100644 --- a/surfsense_backend/app/tasks/stream_connector_search_results.py +++ b/surfsense_backend/app/tasks/stream_connector_search_results.py @@ -20,6 +20,7 @@ async def stream_connector_search_results( langchain_chat_history: list[Any], search_mode_str: str, document_ids_to_add_in_context: list[int], + language: str | None = None, ) -> AsyncGenerator[str, None]: """ Stream connector search results to the client @@ -66,8 +67,10 @@ async def stream_connector_search_results( "search_mode": search_mode, "research_mode": research_mode, "document_ids_to_add_in_context": document_ids_to_add_in_context, + "language": language, # Add language to the configuration } } + # print(f"Researcher configuration: {config['configurable']}") # Debug print # Initialize state with database session and streaming service initial_state = State( db_session=session, diff --git a/surfsense_web/components/onboard/add-provider-step.tsx b/surfsense_web/components/onboard/add-provider-step.tsx index 9b70c8d7f..6517fe0a1 100644 --- a/surfsense_web/components/onboard/add-provider-step.tsx +++ b/surfsense_web/components/onboard/add-provider-step.tsx @@ -18,6 +18,7 @@ import { SelectValue, } from "@/components/ui/select"; import { LLM_PROVIDERS } from "@/contracts/enums/llm-providers"; +import { LANGUAGES } from "@/contracts/enums/languages"; import { type CreateLLMConfig, useLLMConfigs } from "@/hooks/use-llm-configs"; import InferenceParamsEditor from "../inference-params-editor"; @@ -42,6 +43,7 @@ export function AddProviderStep({ model_name: "", api_key: "", api_base: "", + language: "English", litellm_params: {}, search_space_id: searchSpaceId, }); @@ -70,6 +72,7 @@ export function AddProviderStep({ model_name: "", api_key: "", api_base: "", + language: "English", litellm_params: {}, search_space_id: searchSpaceId, }); @@ -119,6 +122,7 @@ export function AddProviderStep({

Model: {config.model_name} + {config.language && ` • Language: ${config.language}`} {config.api_base && ` • Base: ${config.api_base}`}

@@ -169,7 +173,7 @@ export function AddProviderStep({
-
+
+ + {/* language */} +
+ + +
+
{formData.provider === "CUSTOM" && ( diff --git a/surfsense_web/components/settings/model-config-manager.tsx b/surfsense_web/components/settings/model-config-manager.tsx index 7384337b6..a236f1965 100644 --- a/surfsense_web/components/settings/model-config-manager.tsx +++ b/surfsense_web/components/settings/model-config-manager.tsx @@ -38,6 +38,7 @@ import { SelectValue, } from "@/components/ui/select"; import { LLM_PROVIDERS } from "@/contracts/enums/llm-providers"; +import { LANGUAGES } from "@/contracts/enums/languages"; import { type CreateLLMConfig, type LLMConfig, useLLMConfigs } from "@/hooks/use-llm-configs"; import InferenceParamsEditor from "../inference-params-editor"; @@ -65,6 +66,7 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) { model_name: "", api_key: "", api_base: "", + language: "English", litellm_params: {}, search_space_id: searchSpaceId, }); @@ -80,6 +82,7 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) { model_name: editingConfig.model_name, api_key: editingConfig.api_key, api_base: editingConfig.api_base || "", + language: editingConfig.language || "Enlgish", litellm_params: editingConfig.litellm_params || {}, search_space_id: searchSpaceId, }); @@ -118,6 +121,7 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) { model_name: "", api_key: "", api_base: "", + language: "English", litellm_params: {}, search_space_id: searchSpaceId, }); @@ -323,6 +327,13 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) {

{config.model_name}

+ {config.language && ( +
+ + {config.language} + +
+ )}
@@ -432,6 +443,7 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) { model_name: "", api_key: "", api_base: "", + language: "", litellm_params: {}, search_space_id: searchSpaceId, }); @@ -524,6 +536,25 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) { )} +
+ + +
+
; created_at: string; search_space_id: number; @@ -31,6 +32,7 @@ export interface CreateLLMConfig { model_name: string; api_key: string; api_base?: string; + language?: string; litellm_params?: Record; search_space_id: number; }