diff --git a/surfsense_backend/alembic/versions/23_associate_connectors_with_search_spaces.py b/surfsense_backend/alembic/versions/23_associate_connectors_with_search_spaces.py index 20e9d7840..a693b9ec5 100644 --- a/surfsense_backend/alembic/versions/23_associate_connectors_with_search_spaces.py +++ b/surfsense_backend/alembic/versions/23_associate_connectors_with_search_spaces.py @@ -2,7 +2,6 @@ Revision ID: '23' Revises: '22' -Create Date: 2025-01-10 12:00:00.000000 """ diff --git a/surfsense_backend/alembic/versions/24_fix_null_chat_types.py b/surfsense_backend/alembic/versions/24_fix_null_chat_types.py index 35313d27b..e0d371f1e 100644 --- a/surfsense_backend/alembic/versions/24_fix_null_chat_types.py +++ b/surfsense_backend/alembic/versions/24_fix_null_chat_types.py @@ -2,7 +2,6 @@ Revision ID: 24 Revises: 23 -Create Date: 2025-01-10 14:00:00.000000 """ diff --git a/surfsense_backend/alembic/versions/25_migrate_llm_configs_to_search_spaces.py b/surfsense_backend/alembic/versions/25_migrate_llm_configs_to_search_spaces.py index 116a3c687..c9966599c 100644 --- a/surfsense_backend/alembic/versions/25_migrate_llm_configs_to_search_spaces.py +++ b/surfsense_backend/alembic/versions/25_migrate_llm_configs_to_search_spaces.py @@ -2,7 +2,6 @@ Revision ID: 25 Revises: 24 -Create Date: 2025-01-10 14:00:00.000000 Changes: 1. Migrate llm_configs from user association to search_space association diff --git a/surfsense_backend/alembic/versions/26_add_language_column_to_llm_configs.py b/surfsense_backend/alembic/versions/26_add_language_column_to_llm_configs.py new file mode 100644 index 000000000..e5cdc37d7 --- /dev/null +++ b/surfsense_backend/alembic/versions/26_add_language_column_to_llm_configs.py @@ -0,0 +1,69 @@ +"""Add language column to llm_configs + +Revision ID: 26 +Revises: 25 + +Changes: +1. Add language column to llm_configs table with default value of 'English' +""" + +from collections.abc import Sequence + +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "26" +down_revision: str | None = "25" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Add language column to llm_configs table.""" + + from sqlalchemy import inspect + + conn = op.get_bind() + inspector = inspect(conn) + + # Get existing columns + llm_config_columns = [col["name"] for col in inspector.get_columns("llm_configs")] + + # Add language column if it doesn't exist + if "language" not in llm_config_columns: + op.add_column( + "llm_configs", + sa.Column( + "language", + sa.String(length=50), + nullable=True, + server_default="English", + ), + ) + + # Update existing rows to have 'English' as default + op.execute( + """ + UPDATE llm_configs + SET language = 'English' + WHERE language IS NULL + """ + ) + + +def downgrade() -> None: + """Remove language column from llm_configs table.""" + + from sqlalchemy import inspect + + conn = op.get_bind() + inspector = inspect(conn) + + # Get existing columns + llm_config_columns = [col["name"] for col in inspector.get_columns("llm_configs")] + + # Drop language column if it exists + if "language" in llm_config_columns: + op.drop_column("llm_configs", "language") diff --git a/surfsense_backend/app/agents/researcher/configuration.py b/surfsense_backend/app/agents/researcher/configuration.py index e8ee856de..24d8c819e 100644 --- a/surfsense_backend/app/agents/researcher/configuration.py +++ b/surfsense_backend/app/agents/researcher/configuration.py @@ -37,8 +37,7 @@ class Configuration: search_mode: SearchMode research_mode: ResearchMode document_ids_to_add_in_context: list[int] - language: str | None = None - + language: str | None = None @classmethod def from_runnable_config( diff --git a/surfsense_backend/app/agents/researcher/prompts.py b/surfsense_backend/app/agents/researcher/prompts.py index 868a78851..825772a24 100644 --- a/surfsense_backend/app/agents/researcher/prompts.py +++ b/surfsense_backend/app/agents/researcher/prompts.py @@ -1,9 +1,12 @@ import datetime + def _build_language_instruction(language: str | None = None): if language: return f"\n\nIMPORTANT: Please respond in {language} language. All your responses, explanations, and analysis should be written in {language}." return "" + + def get_answer_outline_system_prompt(language: str | None = None) -> str: language_instruction = _build_language_instruction(language) diff --git a/surfsense_backend/app/agents/researcher/qna_agent/nodes.py b/surfsense_backend/app/agents/researcher/qna_agent/nodes.py index 20374b706..c4e79d685 100644 --- a/surfsense_backend/app/agents/researcher/qna_agent/nodes.py +++ b/surfsense_backend/app/agents/researcher/qna_agent/nodes.py @@ -102,7 +102,7 @@ async def answer_question(state: State, config: RunnableConfig) -> dict[str, Any user_query = configuration.user_query user_id = configuration.user_id search_space_id = configuration.search_space_id - language = configuration.language + language = configuration.language # Get user's fast LLM llm = await get_user_fast_llm(state.db_session, user_id, search_space_id) if not llm: @@ -127,7 +127,9 @@ async def answer_question(state: State, config: RunnableConfig) -> dict[str, Any """ # Use initial system prompt for token calculation - initial_system_prompt = get_qna_citation_system_prompt(chat_history_str, language) + initial_system_prompt = get_qna_citation_system_prompt( + chat_history_str, language + ) base_messages = [ SystemMessage(content=initial_system_prompt), HumanMessage(content=base_human_message_template), diff --git a/surfsense_backend/app/agents/researcher/qna_agent/prompts.py b/surfsense_backend/app/agents/researcher/qna_agent/prompts.py index de17ec933..9c35f90cc 100644 --- a/surfsense_backend/app/agents/researcher/qna_agent/prompts.py +++ b/surfsense_backend/app/agents/researcher/qna_agent/prompts.py @@ -1,7 +1,11 @@ import datetime + from ..prompts import _build_language_instruction -def get_qna_citation_system_prompt(chat_history: str | None = None, language: str | None = None): + +def get_qna_citation_system_prompt( + chat_history: str | None = None, language: str | None = None +): chat_history_section = ( f""" @@ -15,7 +19,7 @@ NO CHAT HISTORY PROVIDED """ ) - + # Add language instruction if specified language_instruction = _build_language_instruction(language) return f""" @@ -151,7 +155,9 @@ Make sure your response: """ -def get_qna_no_documents_system_prompt(chat_history: str | None = None, language: str | None = None): +def get_qna_no_documents_system_prompt( + chat_history: str | None = None, language: str | None = None +): chat_history_section = ( f""" @@ -165,7 +171,7 @@ NO CHAT HISTORY PROVIDED """ ) - + # Add language instruction if specified language_instruction = _build_language_instruction(language) diff --git a/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py b/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py index a6a561bf1..3c34eb474 100644 --- a/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py +++ b/surfsense_backend/app/agents/researcher/sub_section_writer/prompts.py @@ -1,6 +1,11 @@ import datetime + from ..prompts import _build_language_instruction -def get_citation_system_prompt(chat_history: str | None = None, language: str | None = None): + + +def get_citation_system_prompt( + chat_history: str | None = None, language: str | None = None +): chat_history_section = ( f""" @@ -14,7 +19,7 @@ NO CHAT HISTORY PROVIDED """ ) - + # Add language instruction if specified language_instruction = _build_language_instruction(language) @@ -158,7 +163,9 @@ Make sure your response: """ -def get_no_documents_system_prompt(chat_history: str | None = None, language: str | None = None): +def get_no_documents_system_prompt( + chat_history: str | None = None, language: str | None = None +): chat_history_section = ( f""" @@ -172,7 +179,7 @@ NO CHAT HISTORY PROVIDED """ ) - + # Add language instruction if specified language_instruction = _build_language_instruction(language) diff --git a/surfsense_backend/app/routes/chats_routes.py b/surfsense_backend/app/routes/chats_routes.py index 0874d2611..e003dc260 100644 --- a/surfsense_backend/app/routes/chats_routes.py +++ b/surfsense_backend/app/routes/chats_routes.py @@ -6,7 +6,6 @@ from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select from sqlalchemy.orm import selectinload - from app.db import Chat, SearchSpace, User, UserSearchSpacePreference, get_async_session from app.schemas import ( AISDKChatRequest, @@ -64,47 +63,53 @@ async def handle_chat_data( language_result = await session.execute( select(UserSearchSpacePreference) .options( - selectinload(UserSearchSpacePreference.search_space).selectinload(SearchSpace.llm_configs), + selectinload(UserSearchSpacePreference.search_space).selectinload( + SearchSpace.llm_configs + ), selectinload(UserSearchSpacePreference.long_context_llm), selectinload(UserSearchSpacePreference.fast_llm), - selectinload(UserSearchSpacePreference.strategic_llm) + selectinload(UserSearchSpacePreference.strategic_llm), ) .filter( - UserSearchSpacePreference.search_space_id == search_space_id, - UserSearchSpacePreference.user_id == user.id + UserSearchSpacePreference.search_space_id == search_space_id, + UserSearchSpacePreference.user_id == user.id, ) ) user_preference = language_result.scalars().first() # print("UserSearchSpacePreference:", user_preference) - + language = None - if user_preference and user_preference.search_space and user_preference.search_space.llm_configs: + if ( + user_preference + and user_preference.search_space + and user_preference.search_space.llm_configs + ): llm_configs = user_preference.search_space.llm_configs - - - for preferred_llm in [user_preference.fast_llm, user_preference.long_context_llm, user_preference.strategic_llm]: - if preferred_llm and getattr(preferred_llm, 'language', None): + + for preferred_llm in [ + user_preference.fast_llm, + user_preference.long_context_llm, + user_preference.strategic_llm, + ]: + if preferred_llm and getattr(preferred_llm, "language", None): language = preferred_llm.language break - - + if not language: first_llm_config = llm_configs[0] - language = getattr(first_llm_config, 'language', None) - - + language = getattr(first_llm_config, "language", None) + except HTTPException: raise HTTPException( status_code=403, detail="You don't have access to this search space" ) from None - + langchain_chat_history = [] for message in messages[:-1]: if message["role"] == "user": langchain_chat_history.append(HumanMessage(content=message["content"])) elif message["role"] == "assistant": langchain_chat_history.append(AIMessage(content=message["content"])) - response = StreamingResponse( stream_connector_search_results( @@ -117,7 +122,7 @@ async def handle_chat_data( langchain_chat_history, search_mode_str, document_ids_to_add_in_context, - language, + language, ) ) diff --git a/surfsense_backend/app/routes/llm_config_routes.py b/surfsense_backend/app/routes/llm_config_routes.py index 896f7be41..ec8ea5846 100644 --- a/surfsense_backend/app/routes/llm_config_routes.py +++ b/surfsense_backend/app/routes/llm_config_routes.py @@ -299,10 +299,10 @@ async def update_user_llm_preferences( # Validate that all provided LLM config IDs belong to the search space update_data = preferences.model_dump(exclude_unset=True) - + # Store language from configs to validate consistency languages = set() - + for _key, llm_config_id in update_data.items(): if llm_config_id is not None: # Verify the LLM config belongs to the search space @@ -318,10 +318,10 @@ async def update_user_llm_preferences( status_code=404, detail=f"LLM configuration {llm_config_id} not found in this search space", ) - + # Collect language for consistency check languages.add(llm_config.language) - + # Check if all selected LLM configs have the same language if len(languages) > 1: raise HTTPException(