From 045537aa796970fffdefef54d2b3498dc28c149d Mon Sep 17 00:00:00 2001 From: Tarun Date: Sun, 12 Oct 2025 13:13:42 +0530 Subject: [PATCH 1/4] feat: add language support across configurations and prompts --- .../app/agents/researcher/configuration.py | 2 + .../app/agents/researcher/nodes.py | 4 +- .../app/agents/researcher/prompts.py | 7 +- .../researcher/qna_agent/configuration.py | 1 + .../app/agents/researcher/qna_agent/nodes.py | 8 +-- .../agents/researcher/qna_agent/prompts.py | 18 +++-- .../researcher/sub_section_writer/prompts.py | 18 +++-- surfsense_backend/app/db.py | 2 + surfsense_backend/app/routes/chats_routes.py | 46 ++++++++++++- .../app/routes/llm_config_routes.py | 15 +++- surfsense_backend/app/schemas/llm_config.py | 6 ++ .../tasks/stream_connector_search_results.py | 3 + .../components/onboard/add-provider-step.tsx | 27 +++++++- .../settings/model-config-manager.tsx | 32 +++++++++ surfsense_web/contracts/enums/languages.ts | 69 +++++++++++++++++++ surfsense_web/hooks/use-llm-configs.ts | 2 + 16 files changed, 242 insertions(+), 18 deletions(-) create mode 100644 surfsense_web/contracts/enums/languages.ts diff --git a/surfsense_backend/app/agents/researcher/configuration.py b/surfsense_backend/app/agents/researcher/configuration.py index 3e81a59c0..e8ee856de 100644 --- a/surfsense_backend/app/agents/researcher/configuration.py +++ b/surfsense_backend/app/agents/researcher/configuration.py @@ -37,6 +37,8 @@ class Configuration: search_mode: SearchMode research_mode: ResearchMode document_ids_to_add_in_context: list[int] + language: str | None = None + @classmethod def from_runnable_config( diff --git a/surfsense_backend/app/agents/researcher/nodes.py b/surfsense_backend/app/agents/researcher/nodes.py index 0835fb861..fe869c265 100644 --- a/surfsense_backend/app/agents/researcher/nodes.py +++ b/surfsense_backend/app/agents/researcher/nodes.py @@ -578,6 +578,7 @@ async def write_answer_outline( num_sections = configuration.num_sections user_id = configuration.user_id search_space_id = configuration.search_space_id + language = configuration.language # Get language from configuration writer( { @@ -628,7 +629,7 @@ async def write_answer_outline( # Create messages for the LLM messages = [ - SystemMessage(content=get_answer_outline_system_prompt()), + SystemMessage(content=get_answer_outline_system_prompt(language=language)), HumanMessage(content=human_message_content), ] @@ -2000,6 +2001,7 @@ async def handle_qna_workflow( "relevant_documents": all_documents, # Use combined documents "user_id": configuration.user_id, "search_space_id": configuration.search_space_id, + "language": configuration.language, } } diff --git a/surfsense_backend/app/agents/researcher/prompts.py b/surfsense_backend/app/agents/researcher/prompts.py index 44b218913..b7265602a 100644 --- a/surfsense_backend/app/agents/researcher/prompts.py +++ b/surfsense_backend/app/agents/researcher/prompts.py @@ -1,9 +1,14 @@ import datetime -def get_answer_outline_system_prompt(): +def get_answer_outline_system_prompt(language: str | None = None) -> str: + language_instruction = "" + if language: + language_instruction = f"\n\nIMPORTANT: Please respond in {language} language. All your responses, explanations, and analysis should be written in {language}." + return f""" Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")} +{language_instruction} You are an expert research assistant specializing in structuring information. Your task is to create a detailed and logical research outline based on the user's query. This outline will serve as the blueprint for generating a comprehensive research report. diff --git a/surfsense_backend/app/agents/researcher/qna_agent/configuration.py b/surfsense_backend/app/agents/researcher/qna_agent/configuration.py index 5a4529e0d..ea107a575 100644 --- a/surfsense_backend/app/agents/researcher/qna_agent/configuration.py +++ b/surfsense_backend/app/agents/researcher/qna_agent/configuration.py @@ -20,6 +20,7 @@ class Configuration: ] # Documents provided directly to the agent for answering user_id: str # User identifier search_space_id: int # Search space identifier + language: str | None = None # Language for responses @classmethod def from_runnable_config( diff --git a/surfsense_backend/app/agents/researcher/qna_agent/nodes.py b/surfsense_backend/app/agents/researcher/qna_agent/nodes.py index fd6861efb..20374b706 100644 --- a/surfsense_backend/app/agents/researcher/qna_agent/nodes.py +++ b/surfsense_backend/app/agents/researcher/qna_agent/nodes.py @@ -102,7 +102,7 @@ async def answer_question(state: State, config: RunnableConfig) -> dict[str, Any user_query = configuration.user_query user_id = configuration.user_id search_space_id = configuration.search_space_id - + language = configuration.language # Get user's fast LLM llm = await get_user_fast_llm(state.db_session, user_id, search_space_id) if not llm: @@ -127,7 +127,7 @@ async def answer_question(state: State, config: RunnableConfig) -> dict[str, Any """ # Use initial system prompt for token calculation - initial_system_prompt = get_qna_citation_system_prompt(chat_history_str) + initial_system_prompt = get_qna_citation_system_prompt(chat_history_str, language) base_messages = [ SystemMessage(content=initial_system_prompt), HumanMessage(content=base_human_message_template), @@ -146,9 +146,9 @@ async def answer_question(state: State, config: RunnableConfig) -> dict[str, Any # Choose system prompt based on final document availability system_prompt = ( - get_qna_citation_system_prompt(chat_history_str) + get_qna_citation_system_prompt(chat_history_str, language) if has_documents - else get_qna_no_documents_system_prompt(chat_history_str) + else get_qna_no_documents_system_prompt(chat_history_str, language) ) # Generate documents section diff --git a/surfsense_backend/app/agents/researcher/qna_agent/prompts.py b/surfsense_backend/app/agents/researcher/qna_agent/prompts.py index 212788804..deb5dd59f 100644 --- a/surfsense_backend/app/agents/researcher/qna_agent/prompts.py +++ b/surfsense_backend/app/agents/researcher/qna_agent/prompts.py @@ -1,7 +1,7 @@ import datetime -def get_qna_citation_system_prompt(chat_history: str | None = None): +def get_qna_citation_system_prompt(chat_history: str | None = None, language: str | None = None): chat_history_section = ( f""" @@ -15,10 +15,15 @@ NO CHAT HISTORY PROVIDED """ ) + + # Add language instruction if specified + language_instruction = "" + if language: + language_instruction = f"\n\nIMPORTANT: Please respond in {language} language. All your responses, explanations, and analysis should be written in {language}." return f""" Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")} -You are SurfSense, an advanced AI research assistant that provides detailed, well-researched answers to user questions by synthesizing information from multiple personal knowledge sources. +You are SurfSense, an advanced AI research assistant that provides detailed, well-researched answers to user questions by synthesizing information from multiple personal knowledge sources.{language_instruction} {chat_history_section} - EXTENSION: "Web content saved via SurfSense browser extension" (personal browsing history) @@ -149,7 +154,7 @@ Make sure your response: """ -def get_qna_no_documents_system_prompt(chat_history: str | None = None): +def get_qna_no_documents_system_prompt(chat_history: str | None = None, language: str | None = None): chat_history_section = ( f""" @@ -163,10 +168,15 @@ NO CHAT HISTORY PROVIDED """ ) + + # Add language instruction if specified + language_instruction = "" + if language: + language_instruction = f"\n\nIMPORTANT: Please respond in {language} language. All your responses, explanations, and analysis should be written in {language}." return f""" Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")} -You are SurfSense, an advanced AI research assistant that provides helpful, detailed answers to user questions in a conversational manner. +You are SurfSense, an advanced AI research assistant that provides helpful, detailed answers to user questions in a conversational manner.{language_instruction} {chat_history_section} The user has asked a question but there are no specific documents from their personal knowledge base available to answer it. You should provide a helpful response based on: diff --git a/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py b/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py index c3d487671..3954d47e5 100644 --- a/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py +++ b/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py @@ -1,7 +1,7 @@ import datetime -def get_citation_system_prompt(chat_history: str | None = None): +def get_citation_system_prompt(chat_history: str | None = None, language: str | None = None): chat_history_section = ( f""" @@ -15,10 +15,15 @@ NO CHAT HISTORY PROVIDED """ ) + + # Add language instruction if specified + language_instruction = "" + if language: + language_instruction = f"\n\nIMPORTANT: Please respond in {language} language. All your responses, explanations, and analysis should be written in {language}." return f""" Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")} -You are SurfSense, an advanced AI research assistant that synthesizes information from multiple knowledge sources to provide comprehensive, well-cited answers to user queries. +You are SurfSense, an advanced AI research assistant that synthesizes information from multiple knowledge sources to provide comprehensive, well-cited answers to user queries.{language_instruction} {chat_history_section} - EXTENSION: "Web content saved via SurfSense browser extension" (personal browsing history) @@ -156,7 +161,7 @@ Make sure your response: """ -def get_no_documents_system_prompt(chat_history: str | None = None): +def get_no_documents_system_prompt(chat_history: str | None = None, language: str | None = None): chat_history_section = ( f""" @@ -170,10 +175,15 @@ NO CHAT HISTORY PROVIDED """ ) + + # Add language instruction if specified + language_instruction = "" + if language: + language_instruction = f"\n\nIMPORTANT: Please respond in {language} language. All your responses, explanations, and analysis should be written in {language}." return f""" Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")} -You are SurfSense, an advanced AI research assistant that helps users create well-structured content for their documents and research. +You are SurfSense, an advanced AI research assistant that helps users create well-structured content for their documents and research.{language_instruction} {chat_history_section} You are writing content for a specific sub-section of a document. No specific documents from the user's personal knowledge base are available, so you should create content based on: diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index eb33145cf..e476ed8e5 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -296,6 +296,8 @@ class LLMConfig(BaseModel, TimestampMixin): api_key = Column(String, nullable=False) api_base = Column(String(500), nullable=True) + language = Column(String(50), nullable=True, default="English") + # For any other parameters that litellm supports litellm_params = Column(JSON, nullable=True, default={}) diff --git a/surfsense_backend/app/routes/chats_routes.py b/surfsense_backend/app/routes/chats_routes.py index e4d02686f..d30bf5451 100644 --- a/surfsense_backend/app/routes/chats_routes.py +++ b/surfsense_backend/app/routes/chats_routes.py @@ -4,8 +4,10 @@ from langchain.schema import AIMessage, HumanMessage from sqlalchemy.exc import IntegrityError, OperationalError from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select +from sqlalchemy.orm import selectinload -from app.db import Chat, SearchSpace, User, get_async_session + +from app.db import Chat, SearchSpace, User, UserSearchSpacePreference, get_async_session from app.schemas import ( AISDKChatRequest, ChatCreate, @@ -53,21 +55,60 @@ async def handle_chat_data( request_data.get("document_ids_to_add_in_context") ) search_mode_str = validate_search_mode(request_data.get("search_mode")) + # print("RESQUEST DATA:", request_data) + # print("SELECTED CONNECTORS:", selected_connectors) # Check if the search space belongs to the current user try: await check_ownership(session, SearchSpace, search_space_id, user) + language_result = await session.execute( + select(UserSearchSpacePreference) + .options( + selectinload(UserSearchSpacePreference.search_space).selectinload(SearchSpace.llm_configs), + selectinload(UserSearchSpacePreference.long_context_llm), + selectinload(UserSearchSpacePreference.fast_llm), + selectinload(UserSearchSpacePreference.strategic_llm) + ) + .filter( + UserSearchSpacePreference.search_space_id == search_space_id, + UserSearchSpacePreference.user_id == user.id + ) + ) + user_preference = language_result.scalars().first() + print("UserSearchSpacePreference:", user_preference) + + language = None + if user_preference and user_preference.search_space and user_preference.search_space.llm_configs: + llm_configs = user_preference.search_space.llm_configs + # print(f"Found {len(llm_configs)} LLM Configs") + # for i, config in enumerate(llm_configs): + # print(f" Config {i+1}: name={config.name}, provider={config.provider}, language={getattr(config, 'language', None)}") + + + for preferred_llm in [user_preference.fast_llm, user_preference.long_context_llm, user_preference.strategic_llm]: + if preferred_llm and getattr(preferred_llm, 'language', None): + language = preferred_llm.language + # print(f"Using language from preferred LLM: {preferred_llm.name} -> {language}") + break + + # no preferred llM has language use first available LLM config + if not language: + first_llm_config = llm_configs[0] + language = getattr(first_llm_config, 'language', None) + # print(f"Using language from first LLM config: {first_llm_config.name} -> {language}") + except HTTPException: raise HTTPException( status_code=403, detail="You don't have access to this search space" ) from None - + # print("Language selected:", language) langchain_chat_history = [] for message in messages[:-1]: if message["role"] == "user": langchain_chat_history.append(HumanMessage(content=message["content"])) elif message["role"] == "assistant": langchain_chat_history.append(AIMessage(content=message["content"])) + response = StreamingResponse( stream_connector_search_results( @@ -80,6 +121,7 @@ async def handle_chat_data( langchain_chat_history, search_mode_str, document_ids_to_add_in_context, + language, ) ) diff --git a/surfsense_backend/app/routes/llm_config_routes.py b/surfsense_backend/app/routes/llm_config_routes.py index 63d540d2c..896f7be41 100644 --- a/surfsense_backend/app/routes/llm_config_routes.py +++ b/surfsense_backend/app/routes/llm_config_routes.py @@ -299,7 +299,10 @@ async def update_user_llm_preferences( # Validate that all provided LLM config IDs belong to the search space update_data = preferences.model_dump(exclude_unset=True) - + + # Store language from configs to validate consistency + languages = set() + for _key, llm_config_id in update_data.items(): if llm_config_id is not None: # Verify the LLM config belongs to the search space @@ -315,6 +318,16 @@ async def update_user_llm_preferences( status_code=404, detail=f"LLM configuration {llm_config_id} not found in this search space", ) + + # Collect language for consistency check + languages.add(llm_config.language) + + # Check if all selected LLM configs have the same language + if len(languages) > 1: + raise HTTPException( + status_code=400, + detail="All selected LLM configurations must have the same language setting", + ) # Update user preferences for key, value in update_data.items(): diff --git a/surfsense_backend/app/schemas/llm_config.py b/surfsense_backend/app/schemas/llm_config.py index 8beb65347..285c15665 100644 --- a/surfsense_backend/app/schemas/llm_config.py +++ b/surfsense_backend/app/schemas/llm_config.py @@ -26,6 +26,9 @@ class LLMConfigBase(BaseModel): litellm_params: dict[str, Any] | None = Field( default=None, description="Additional LiteLLM parameters" ) + language: str | None = Field( + default="English", max_length=50, description="Language for the LLM" + ) class LLMConfigCreate(LLMConfigBase): @@ -49,6 +52,9 @@ class LLMConfigUpdate(BaseModel): api_base: str | None = Field( None, max_length=500, description="Optional API base URL" ) + language: str | None = Field( + None, max_length=50, description="Language for the LLM" + ) litellm_params: dict[str, Any] | None = Field( None, description="Additional LiteLLM parameters" ) diff --git a/surfsense_backend/app/tasks/stream_connector_search_results.py b/surfsense_backend/app/tasks/stream_connector_search_results.py index ead6a89e7..dd1ae4ce5 100644 --- a/surfsense_backend/app/tasks/stream_connector_search_results.py +++ b/surfsense_backend/app/tasks/stream_connector_search_results.py @@ -20,6 +20,7 @@ async def stream_connector_search_results( langchain_chat_history: list[Any], search_mode_str: str, document_ids_to_add_in_context: list[int], + language: str | None = None, ) -> AsyncGenerator[str, None]: """ Stream connector search results to the client @@ -66,8 +67,10 @@ async def stream_connector_search_results( "search_mode": search_mode, "research_mode": research_mode, "document_ids_to_add_in_context": document_ids_to_add_in_context, + "language": language, # Add language to the configuration } } + # print(f"Researcher configuration: {config['configurable']}") # Debug print # Initialize state with database session and streaming service initial_state = State( db_session=session, diff --git a/surfsense_web/components/onboard/add-provider-step.tsx b/surfsense_web/components/onboard/add-provider-step.tsx index 9b70c8d7f..6517fe0a1 100644 --- a/surfsense_web/components/onboard/add-provider-step.tsx +++ b/surfsense_web/components/onboard/add-provider-step.tsx @@ -18,6 +18,7 @@ import { SelectValue, } from "@/components/ui/select"; import { LLM_PROVIDERS } from "@/contracts/enums/llm-providers"; +import { LANGUAGES } from "@/contracts/enums/languages"; import { type CreateLLMConfig, useLLMConfigs } from "@/hooks/use-llm-configs"; import InferenceParamsEditor from "../inference-params-editor"; @@ -42,6 +43,7 @@ export function AddProviderStep({ model_name: "", api_key: "", api_base: "", + language: "English", litellm_params: {}, search_space_id: searchSpaceId, }); @@ -70,6 +72,7 @@ export function AddProviderStep({ model_name: "", api_key: "", api_base: "", + language: "English", litellm_params: {}, search_space_id: searchSpaceId, }); @@ -119,6 +122,7 @@ export function AddProviderStep({

Model: {config.model_name} + {config.language && ` • Language: ${config.language}`} {config.api_base && ` • Base: ${config.api_base}`}

@@ -169,7 +173,7 @@ export function AddProviderStep({
-
+
+ + {/* language */} +
+ + +
+
{formData.provider === "CUSTOM" && ( diff --git a/surfsense_web/components/settings/model-config-manager.tsx b/surfsense_web/components/settings/model-config-manager.tsx index 7384337b6..a236f1965 100644 --- a/surfsense_web/components/settings/model-config-manager.tsx +++ b/surfsense_web/components/settings/model-config-manager.tsx @@ -38,6 +38,7 @@ import { SelectValue, } from "@/components/ui/select"; import { LLM_PROVIDERS } from "@/contracts/enums/llm-providers"; +import { LANGUAGES } from "@/contracts/enums/languages"; import { type CreateLLMConfig, type LLMConfig, useLLMConfigs } from "@/hooks/use-llm-configs"; import InferenceParamsEditor from "../inference-params-editor"; @@ -65,6 +66,7 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) { model_name: "", api_key: "", api_base: "", + language: "English", litellm_params: {}, search_space_id: searchSpaceId, }); @@ -80,6 +82,7 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) { model_name: editingConfig.model_name, api_key: editingConfig.api_key, api_base: editingConfig.api_base || "", + language: editingConfig.language || "Enlgish", litellm_params: editingConfig.litellm_params || {}, search_space_id: searchSpaceId, }); @@ -118,6 +121,7 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) { model_name: "", api_key: "", api_base: "", + language: "English", litellm_params: {}, search_space_id: searchSpaceId, }); @@ -323,6 +327,13 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) {

{config.model_name}

+ {config.language && ( +
+ + {config.language} + +
+ )}
@@ -432,6 +443,7 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) { model_name: "", api_key: "", api_base: "", + language: "", litellm_params: {}, search_space_id: searchSpaceId, }); @@ -524,6 +536,25 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) { )} +
+ + +
+
; created_at: string; search_space_id: number; @@ -31,6 +32,7 @@ export interface CreateLLMConfig { model_name: string; api_key: string; api_base?: string; + language?: string; litellm_params?: Record; search_space_id: number; } From 807f4055f93398db3642202d80beb691de5d80de Mon Sep 17 00:00:00 2001 From: Tarun Date: Sun, 12 Oct 2025 13:44:45 +0530 Subject: [PATCH 2/4] Spelling mistake --- surfsense_web/components/settings/model-config-manager.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/surfsense_web/components/settings/model-config-manager.tsx b/surfsense_web/components/settings/model-config-manager.tsx index a236f1965..5719b01d1 100644 --- a/surfsense_web/components/settings/model-config-manager.tsx +++ b/surfsense_web/components/settings/model-config-manager.tsx @@ -82,7 +82,7 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) { model_name: editingConfig.model_name, api_key: editingConfig.api_key, api_base: editingConfig.api_base || "", - language: editingConfig.language || "Enlgish", + language: editingConfig.language || "English", litellm_params: editingConfig.litellm_params || {}, search_space_id: searchSpaceId, }); From 120d60465e33bdb6f51195a3d9f23a47c5091ef6 Mon Sep 17 00:00:00 2001 From: Tarun Date: Sun, 12 Oct 2025 23:40:46 +0530 Subject: [PATCH 3/4] refactor: streamline language instruction handling across prompts --- .../app/agents/researcher/prompts.py | 9 +++++---- .../app/agents/researcher/qna_agent/prompts.py | 11 +++-------- .../researcher/sub_section_writer/prompts.py | 11 +++-------- surfsense_backend/app/routes/chats_routes.py | 18 +++++++----------- 4 files changed, 18 insertions(+), 31 deletions(-) diff --git a/surfsense_backend/app/agents/researcher/prompts.py b/surfsense_backend/app/agents/researcher/prompts.py index b7265602a..868a78851 100644 --- a/surfsense_backend/app/agents/researcher/prompts.py +++ b/surfsense_backend/app/agents/researcher/prompts.py @@ -1,10 +1,11 @@ import datetime - -def get_answer_outline_system_prompt(language: str | None = None) -> str: - language_instruction = "" +def _build_language_instruction(language: str | None = None): if language: - language_instruction = f"\n\nIMPORTANT: Please respond in {language} language. All your responses, explanations, and analysis should be written in {language}." + return f"\n\nIMPORTANT: Please respond in {language} language. All your responses, explanations, and analysis should be written in {language}." + return "" +def get_answer_outline_system_prompt(language: str | None = None) -> str: + language_instruction = _build_language_instruction(language) return f""" Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")} diff --git a/surfsense_backend/app/agents/researcher/qna_agent/prompts.py b/surfsense_backend/app/agents/researcher/qna_agent/prompts.py index deb5dd59f..de17ec933 100644 --- a/surfsense_backend/app/agents/researcher/qna_agent/prompts.py +++ b/surfsense_backend/app/agents/researcher/qna_agent/prompts.py @@ -1,5 +1,5 @@ import datetime - +from ..prompts import _build_language_instruction def get_qna_citation_system_prompt(chat_history: str | None = None, language: str | None = None): chat_history_section = ( @@ -17,10 +17,7 @@ NO CHAT HISTORY PROVIDED ) # Add language instruction if specified - language_instruction = "" - if language: - language_instruction = f"\n\nIMPORTANT: Please respond in {language} language. All your responses, explanations, and analysis should be written in {language}." - + language_instruction = _build_language_instruction(language) return f""" Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")} You are SurfSense, an advanced AI research assistant that provides detailed, well-researched answers to user questions by synthesizing information from multiple personal knowledge sources.{language_instruction} @@ -170,9 +167,7 @@ NO CHAT HISTORY PROVIDED ) # Add language instruction if specified - language_instruction = "" - if language: - language_instruction = f"\n\nIMPORTANT: Please respond in {language} language. All your responses, explanations, and analysis should be written in {language}." + language_instruction = _build_language_instruction(language) return f""" Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")} diff --git a/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py b/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py index 3954d47e5..a6a561bf1 100644 --- a/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py +++ b/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py @@ -1,6 +1,5 @@ import datetime - - +from ..prompts import _build_language_instruction def get_citation_system_prompt(chat_history: str | None = None, language: str | None = None): chat_history_section = ( f""" @@ -17,9 +16,7 @@ NO CHAT HISTORY PROVIDED ) # Add language instruction if specified - language_instruction = "" - if language: - language_instruction = f"\n\nIMPORTANT: Please respond in {language} language. All your responses, explanations, and analysis should be written in {language}." + language_instruction = _build_language_instruction(language) return f""" Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")} @@ -177,9 +174,7 @@ NO CHAT HISTORY PROVIDED ) # Add language instruction if specified - language_instruction = "" - if language: - language_instruction = f"\n\nIMPORTANT: Please respond in {language} language. All your responses, explanations, and analysis should be written in {language}." + language_instruction = _build_language_instruction(language) return f""" Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")} diff --git a/surfsense_backend/app/routes/chats_routes.py b/surfsense_backend/app/routes/chats_routes.py index d30bf5451..0874d2611 100644 --- a/surfsense_backend/app/routes/chats_routes.py +++ b/surfsense_backend/app/routes/chats_routes.py @@ -75,33 +75,29 @@ async def handle_chat_data( ) ) user_preference = language_result.scalars().first() - print("UserSearchSpacePreference:", user_preference) + # print("UserSearchSpacePreference:", user_preference) language = None if user_preference and user_preference.search_space and user_preference.search_space.llm_configs: llm_configs = user_preference.search_space.llm_configs - # print(f"Found {len(llm_configs)} LLM Configs") - # for i, config in enumerate(llm_configs): - # print(f" Config {i+1}: name={config.name}, provider={config.provider}, language={getattr(config, 'language', None)}") for preferred_llm in [user_preference.fast_llm, user_preference.long_context_llm, user_preference.strategic_llm]: if preferred_llm and getattr(preferred_llm, 'language', None): language = preferred_llm.language - # print(f"Using language from preferred LLM: {preferred_llm.name} -> {language}") break - # no preferred llM has language use first available LLM config - if not language: - first_llm_config = llm_configs[0] - language = getattr(first_llm_config, 'language', None) - # print(f"Using language from first LLM config: {first_llm_config.name} -> {language}") + + if not language: + first_llm_config = llm_configs[0] + language = getattr(first_llm_config, 'language', None) + except HTTPException: raise HTTPException( status_code=403, detail="You don't have access to this search space" ) from None - # print("Language selected:", language) + langchain_chat_history = [] for message in messages[:-1]: if message["role"] == "user": From a3f50ebc4dbad1524c01c6b8022b986d193b443f Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Sun, 12 Oct 2025 20:15:27 -0700 Subject: [PATCH 4/4] feat: added missed migration --- ...associate_connectors_with_search_spaces.py | 1 - .../versions/24_fix_null_chat_types.py | 1 - ...25_migrate_llm_configs_to_search_spaces.py | 1 - .../26_add_language_column_to_llm_configs.py | 69 +++++++++++++++++++ .../app/agents/researcher/configuration.py | 3 +- .../app/agents/researcher/prompts.py | 3 + .../app/agents/researcher/qna_agent/nodes.py | 6 +- .../agents/researcher/qna_agent/prompts.py | 14 ++-- .../researcher/sub_section_writer/prompts.py | 15 ++-- surfsense_backend/app/routes/chats_routes.py | 43 +++++++----- .../app/routes/llm_config_routes.py | 8 +-- 11 files changed, 126 insertions(+), 38 deletions(-) create mode 100644 surfsense_backend/alembic/versions/26_add_language_column_to_llm_configs.py diff --git a/surfsense_backend/alembic/versions/23_associate_connectors_with_search_spaces.py b/surfsense_backend/alembic/versions/23_associate_connectors_with_search_spaces.py index 20e9d7840..a693b9ec5 100644 --- a/surfsense_backend/alembic/versions/23_associate_connectors_with_search_spaces.py +++ b/surfsense_backend/alembic/versions/23_associate_connectors_with_search_spaces.py @@ -2,7 +2,6 @@ Revision ID: '23' Revises: '22' -Create Date: 2025-01-10 12:00:00.000000 """ diff --git a/surfsense_backend/alembic/versions/24_fix_null_chat_types.py b/surfsense_backend/alembic/versions/24_fix_null_chat_types.py index 35313d27b..e0d371f1e 100644 --- a/surfsense_backend/alembic/versions/24_fix_null_chat_types.py +++ b/surfsense_backend/alembic/versions/24_fix_null_chat_types.py @@ -2,7 +2,6 @@ Revision ID: 24 Revises: 23 -Create Date: 2025-01-10 14:00:00.000000 """ diff --git a/surfsense_backend/alembic/versions/25_migrate_llm_configs_to_search_spaces.py b/surfsense_backend/alembic/versions/25_migrate_llm_configs_to_search_spaces.py index 116a3c687..c9966599c 100644 --- a/surfsense_backend/alembic/versions/25_migrate_llm_configs_to_search_spaces.py +++ b/surfsense_backend/alembic/versions/25_migrate_llm_configs_to_search_spaces.py @@ -2,7 +2,6 @@ Revision ID: 25 Revises: 24 -Create Date: 2025-01-10 14:00:00.000000 Changes: 1. Migrate llm_configs from user association to search_space association diff --git a/surfsense_backend/alembic/versions/26_add_language_column_to_llm_configs.py b/surfsense_backend/alembic/versions/26_add_language_column_to_llm_configs.py new file mode 100644 index 000000000..e5cdc37d7 --- /dev/null +++ b/surfsense_backend/alembic/versions/26_add_language_column_to_llm_configs.py @@ -0,0 +1,69 @@ +"""Add language column to llm_configs + +Revision ID: 26 +Revises: 25 + +Changes: +1. Add language column to llm_configs table with default value of 'English' +""" + +from collections.abc import Sequence + +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "26" +down_revision: str | None = "25" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Add language column to llm_configs table.""" + + from sqlalchemy import inspect + + conn = op.get_bind() + inspector = inspect(conn) + + # Get existing columns + llm_config_columns = [col["name"] for col in inspector.get_columns("llm_configs")] + + # Add language column if it doesn't exist + if "language" not in llm_config_columns: + op.add_column( + "llm_configs", + sa.Column( + "language", + sa.String(length=50), + nullable=True, + server_default="English", + ), + ) + + # Update existing rows to have 'English' as default + op.execute( + """ + UPDATE llm_configs + SET language = 'English' + WHERE language IS NULL + """ + ) + + +def downgrade() -> None: + """Remove language column from llm_configs table.""" + + from sqlalchemy import inspect + + conn = op.get_bind() + inspector = inspect(conn) + + # Get existing columns + llm_config_columns = [col["name"] for col in inspector.get_columns("llm_configs")] + + # Drop language column if it exists + if "language" in llm_config_columns: + op.drop_column("llm_configs", "language") diff --git a/surfsense_backend/app/agents/researcher/configuration.py b/surfsense_backend/app/agents/researcher/configuration.py index e8ee856de..24d8c819e 100644 --- a/surfsense_backend/app/agents/researcher/configuration.py +++ b/surfsense_backend/app/agents/researcher/configuration.py @@ -37,8 +37,7 @@ class Configuration: search_mode: SearchMode research_mode: ResearchMode document_ids_to_add_in_context: list[int] - language: str | None = None - + language: str | None = None @classmethod def from_runnable_config( diff --git a/surfsense_backend/app/agents/researcher/prompts.py b/surfsense_backend/app/agents/researcher/prompts.py index 868a78851..825772a24 100644 --- a/surfsense_backend/app/agents/researcher/prompts.py +++ b/surfsense_backend/app/agents/researcher/prompts.py @@ -1,9 +1,12 @@ import datetime + def _build_language_instruction(language: str | None = None): if language: return f"\n\nIMPORTANT: Please respond in {language} language. All your responses, explanations, and analysis should be written in {language}." return "" + + def get_answer_outline_system_prompt(language: str | None = None) -> str: language_instruction = _build_language_instruction(language) diff --git a/surfsense_backend/app/agents/researcher/qna_agent/nodes.py b/surfsense_backend/app/agents/researcher/qna_agent/nodes.py index 20374b706..c4e79d685 100644 --- a/surfsense_backend/app/agents/researcher/qna_agent/nodes.py +++ b/surfsense_backend/app/agents/researcher/qna_agent/nodes.py @@ -102,7 +102,7 @@ async def answer_question(state: State, config: RunnableConfig) -> dict[str, Any user_query = configuration.user_query user_id = configuration.user_id search_space_id = configuration.search_space_id - language = configuration.language + language = configuration.language # Get user's fast LLM llm = await get_user_fast_llm(state.db_session, user_id, search_space_id) if not llm: @@ -127,7 +127,9 @@ async def answer_question(state: State, config: RunnableConfig) -> dict[str, Any """ # Use initial system prompt for token calculation - initial_system_prompt = get_qna_citation_system_prompt(chat_history_str, language) + initial_system_prompt = get_qna_citation_system_prompt( + chat_history_str, language + ) base_messages = [ SystemMessage(content=initial_system_prompt), HumanMessage(content=base_human_message_template), diff --git a/surfsense_backend/app/agents/researcher/qna_agent/prompts.py b/surfsense_backend/app/agents/researcher/qna_agent/prompts.py index de17ec933..9c35f90cc 100644 --- a/surfsense_backend/app/agents/researcher/qna_agent/prompts.py +++ b/surfsense_backend/app/agents/researcher/qna_agent/prompts.py @@ -1,7 +1,11 @@ import datetime + from ..prompts import _build_language_instruction -def get_qna_citation_system_prompt(chat_history: str | None = None, language: str | None = None): + +def get_qna_citation_system_prompt( + chat_history: str | None = None, language: str | None = None +): chat_history_section = ( f""" @@ -15,7 +19,7 @@ NO CHAT HISTORY PROVIDED """ ) - + # Add language instruction if specified language_instruction = _build_language_instruction(language) return f""" @@ -151,7 +155,9 @@ Make sure your response: """ -def get_qna_no_documents_system_prompt(chat_history: str | None = None, language: str | None = None): +def get_qna_no_documents_system_prompt( + chat_history: str | None = None, language: str | None = None +): chat_history_section = ( f""" @@ -165,7 +171,7 @@ NO CHAT HISTORY PROVIDED """ ) - + # Add language instruction if specified language_instruction = _build_language_instruction(language) diff --git a/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py b/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py index a6a561bf1..3c34eb474 100644 --- a/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py +++ b/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py @@ -1,6 +1,11 @@ import datetime + from ..prompts import _build_language_instruction -def get_citation_system_prompt(chat_history: str | None = None, language: str | None = None): + + +def get_citation_system_prompt( + chat_history: str | None = None, language: str | None = None +): chat_history_section = ( f""" @@ -14,7 +19,7 @@ NO CHAT HISTORY PROVIDED """ ) - + # Add language instruction if specified language_instruction = _build_language_instruction(language) @@ -158,7 +163,9 @@ Make sure your response: """ -def get_no_documents_system_prompt(chat_history: str | None = None, language: str | None = None): +def get_no_documents_system_prompt( + chat_history: str | None = None, language: str | None = None +): chat_history_section = ( f""" @@ -172,7 +179,7 @@ NO CHAT HISTORY PROVIDED """ ) - + # Add language instruction if specified language_instruction = _build_language_instruction(language) diff --git a/surfsense_backend/app/routes/chats_routes.py b/surfsense_backend/app/routes/chats_routes.py index 0874d2611..e003dc260 100644 --- a/surfsense_backend/app/routes/chats_routes.py +++ b/surfsense_backend/app/routes/chats_routes.py @@ -6,7 +6,6 @@ from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select from sqlalchemy.orm import selectinload - from app.db import Chat, SearchSpace, User, UserSearchSpacePreference, get_async_session from app.schemas import ( AISDKChatRequest, @@ -64,47 +63,53 @@ async def handle_chat_data( language_result = await session.execute( select(UserSearchSpacePreference) .options( - selectinload(UserSearchSpacePreference.search_space).selectinload(SearchSpace.llm_configs), + selectinload(UserSearchSpacePreference.search_space).selectinload( + SearchSpace.llm_configs + ), selectinload(UserSearchSpacePreference.long_context_llm), selectinload(UserSearchSpacePreference.fast_llm), - selectinload(UserSearchSpacePreference.strategic_llm) + selectinload(UserSearchSpacePreference.strategic_llm), ) .filter( - UserSearchSpacePreference.search_space_id == search_space_id, - UserSearchSpacePreference.user_id == user.id + UserSearchSpacePreference.search_space_id == search_space_id, + UserSearchSpacePreference.user_id == user.id, ) ) user_preference = language_result.scalars().first() # print("UserSearchSpacePreference:", user_preference) - + language = None - if user_preference and user_preference.search_space and user_preference.search_space.llm_configs: + if ( + user_preference + and user_preference.search_space + and user_preference.search_space.llm_configs + ): llm_configs = user_preference.search_space.llm_configs - - - for preferred_llm in [user_preference.fast_llm, user_preference.long_context_llm, user_preference.strategic_llm]: - if preferred_llm and getattr(preferred_llm, 'language', None): + + for preferred_llm in [ + user_preference.fast_llm, + user_preference.long_context_llm, + user_preference.strategic_llm, + ]: + if preferred_llm and getattr(preferred_llm, "language", None): language = preferred_llm.language break - - + if not language: first_llm_config = llm_configs[0] - language = getattr(first_llm_config, 'language', None) - - + language = getattr(first_llm_config, "language", None) + except HTTPException: raise HTTPException( status_code=403, detail="You don't have access to this search space" ) from None - + langchain_chat_history = [] for message in messages[:-1]: if message["role"] == "user": langchain_chat_history.append(HumanMessage(content=message["content"])) elif message["role"] == "assistant": langchain_chat_history.append(AIMessage(content=message["content"])) - response = StreamingResponse( stream_connector_search_results( @@ -117,7 +122,7 @@ async def handle_chat_data( langchain_chat_history, search_mode_str, document_ids_to_add_in_context, - language, + language, ) ) diff --git a/surfsense_backend/app/routes/llm_config_routes.py b/surfsense_backend/app/routes/llm_config_routes.py index 896f7be41..ec8ea5846 100644 --- a/surfsense_backend/app/routes/llm_config_routes.py +++ b/surfsense_backend/app/routes/llm_config_routes.py @@ -299,10 +299,10 @@ async def update_user_llm_preferences( # Validate that all provided LLM config IDs belong to the search space update_data = preferences.model_dump(exclude_unset=True) - + # Store language from configs to validate consistency languages = set() - + for _key, llm_config_id in update_data.items(): if llm_config_id is not None: # Verify the LLM config belongs to the search space @@ -318,10 +318,10 @@ async def update_user_llm_preferences( status_code=404, detail=f"LLM configuration {llm_config_id} not found in this search space", ) - + # Collect language for consistency check languages.add(llm_config.language) - + # Check if all selected LLM configs have the same language if len(languages) > 1: raise HTTPException(