mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-24 21:38:09 +02:00
feat: add language support across configurations and prompts
This commit is contained in:
parent
402039f02f
commit
045537aa79
16 changed files with 242 additions and 18 deletions
|
|
@ -37,6 +37,8 @@ class Configuration:
|
|||
search_mode: SearchMode
|
||||
research_mode: ResearchMode
|
||||
document_ids_to_add_in_context: list[int]
|
||||
language: str | None = None
|
||||
|
||||
|
||||
@classmethod
|
||||
def from_runnable_config(
|
||||
|
|
|
|||
|
|
@ -578,6 +578,7 @@ async def write_answer_outline(
|
|||
num_sections = configuration.num_sections
|
||||
user_id = configuration.user_id
|
||||
search_space_id = configuration.search_space_id
|
||||
language = configuration.language # Get language from configuration
|
||||
|
||||
writer(
|
||||
{
|
||||
|
|
@ -628,7 +629,7 @@ async def write_answer_outline(
|
|||
|
||||
# Create messages for the LLM
|
||||
messages = [
|
||||
SystemMessage(content=get_answer_outline_system_prompt()),
|
||||
SystemMessage(content=get_answer_outline_system_prompt(language=language)),
|
||||
HumanMessage(content=human_message_content),
|
||||
]
|
||||
|
||||
|
|
@ -2000,6 +2001,7 @@ async def handle_qna_workflow(
|
|||
"relevant_documents": all_documents, # Use combined documents
|
||||
"user_id": configuration.user_id,
|
||||
"search_space_id": configuration.search_space_id,
|
||||
"language": configuration.language,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,9 +1,14 @@
|
|||
import datetime
|
||||
|
||||
|
||||
def get_answer_outline_system_prompt():
|
||||
def get_answer_outline_system_prompt(language: str | None = None) -> str:
|
||||
language_instruction = ""
|
||||
if language:
|
||||
language_instruction = f"\n\nIMPORTANT: Please respond in {language} language. All your responses, explanations, and analysis should be written in {language}."
|
||||
|
||||
return f"""
|
||||
Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")}
|
||||
{language_instruction}
|
||||
<answer_outline_system>
|
||||
You are an expert research assistant specializing in structuring information. Your task is to create a detailed and logical research outline based on the user's query. This outline will serve as the blueprint for generating a comprehensive research report.
|
||||
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ class Configuration:
|
|||
] # Documents provided directly to the agent for answering
|
||||
user_id: str # User identifier
|
||||
search_space_id: int # Search space identifier
|
||||
language: str | None = None # Language for responses
|
||||
|
||||
@classmethod
|
||||
def from_runnable_config(
|
||||
|
|
|
|||
|
|
@ -102,7 +102,7 @@ async def answer_question(state: State, config: RunnableConfig) -> dict[str, Any
|
|||
user_query = configuration.user_query
|
||||
user_id = configuration.user_id
|
||||
search_space_id = configuration.search_space_id
|
||||
|
||||
language = configuration.language
|
||||
# Get user's fast LLM
|
||||
llm = await get_user_fast_llm(state.db_session, user_id, search_space_id)
|
||||
if not llm:
|
||||
|
|
@ -127,7 +127,7 @@ async def answer_question(state: State, config: RunnableConfig) -> dict[str, Any
|
|||
"""
|
||||
|
||||
# Use initial system prompt for token calculation
|
||||
initial_system_prompt = get_qna_citation_system_prompt(chat_history_str)
|
||||
initial_system_prompt = get_qna_citation_system_prompt(chat_history_str, language)
|
||||
base_messages = [
|
||||
SystemMessage(content=initial_system_prompt),
|
||||
HumanMessage(content=base_human_message_template),
|
||||
|
|
@ -146,9 +146,9 @@ async def answer_question(state: State, config: RunnableConfig) -> dict[str, Any
|
|||
|
||||
# Choose system prompt based on final document availability
|
||||
system_prompt = (
|
||||
get_qna_citation_system_prompt(chat_history_str)
|
||||
get_qna_citation_system_prompt(chat_history_str, language)
|
||||
if has_documents
|
||||
else get_qna_no_documents_system_prompt(chat_history_str)
|
||||
else get_qna_no_documents_system_prompt(chat_history_str, language)
|
||||
)
|
||||
|
||||
# Generate documents section
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import datetime
|
||||
|
||||
|
||||
def get_qna_citation_system_prompt(chat_history: str | None = None):
|
||||
def get_qna_citation_system_prompt(chat_history: str | None = None, language: str | None = None):
|
||||
chat_history_section = (
|
||||
f"""
|
||||
<chat_history>
|
||||
|
|
@ -15,10 +15,15 @@ NO CHAT HISTORY PROVIDED
|
|||
</chat_history>
|
||||
"""
|
||||
)
|
||||
|
||||
# Add language instruction if specified
|
||||
language_instruction = ""
|
||||
if language:
|
||||
language_instruction = f"\n\nIMPORTANT: Please respond in {language} language. All your responses, explanations, and analysis should be written in {language}."
|
||||
|
||||
return f"""
|
||||
Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")}
|
||||
You are SurfSense, an advanced AI research assistant that provides detailed, well-researched answers to user questions by synthesizing information from multiple personal knowledge sources.
|
||||
You are SurfSense, an advanced AI research assistant that provides detailed, well-researched answers to user questions by synthesizing information from multiple personal knowledge sources.{language_instruction}
|
||||
{chat_history_section}
|
||||
<knowledge_sources>
|
||||
- EXTENSION: "Web content saved via SurfSense browser extension" (personal browsing history)
|
||||
|
|
@ -149,7 +154,7 @@ Make sure your response:
|
|||
"""
|
||||
|
||||
|
||||
def get_qna_no_documents_system_prompt(chat_history: str | None = None):
|
||||
def get_qna_no_documents_system_prompt(chat_history: str | None = None, language: str | None = None):
|
||||
chat_history_section = (
|
||||
f"""
|
||||
<chat_history>
|
||||
|
|
@ -163,10 +168,15 @@ NO CHAT HISTORY PROVIDED
|
|||
</chat_history>
|
||||
"""
|
||||
)
|
||||
|
||||
# Add language instruction if specified
|
||||
language_instruction = ""
|
||||
if language:
|
||||
language_instruction = f"\n\nIMPORTANT: Please respond in {language} language. All your responses, explanations, and analysis should be written in {language}."
|
||||
|
||||
return f"""
|
||||
Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")}
|
||||
You are SurfSense, an advanced AI research assistant that provides helpful, detailed answers to user questions in a conversational manner.
|
||||
You are SurfSense, an advanced AI research assistant that provides helpful, detailed answers to user questions in a conversational manner.{language_instruction}
|
||||
{chat_history_section}
|
||||
<context>
|
||||
The user has asked a question but there are no specific documents from their personal knowledge base available to answer it. You should provide a helpful response based on:
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import datetime
|
||||
|
||||
|
||||
def get_citation_system_prompt(chat_history: str | None = None):
|
||||
def get_citation_system_prompt(chat_history: str | None = None, language: str | None = None):
|
||||
chat_history_section = (
|
||||
f"""
|
||||
<chat_history>
|
||||
|
|
@ -15,10 +15,15 @@ NO CHAT HISTORY PROVIDED
|
|||
</chat_history>
|
||||
"""
|
||||
)
|
||||
|
||||
# Add language instruction if specified
|
||||
language_instruction = ""
|
||||
if language:
|
||||
language_instruction = f"\n\nIMPORTANT: Please respond in {language} language. All your responses, explanations, and analysis should be written in {language}."
|
||||
|
||||
return f"""
|
||||
Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")}
|
||||
You are SurfSense, an advanced AI research assistant that synthesizes information from multiple knowledge sources to provide comprehensive, well-cited answers to user queries.
|
||||
You are SurfSense, an advanced AI research assistant that synthesizes information from multiple knowledge sources to provide comprehensive, well-cited answers to user queries.{language_instruction}
|
||||
{chat_history_section}
|
||||
<knowledge_sources>
|
||||
- EXTENSION: "Web content saved via SurfSense browser extension" (personal browsing history)
|
||||
|
|
@ -156,7 +161,7 @@ Make sure your response:
|
|||
"""
|
||||
|
||||
|
||||
def get_no_documents_system_prompt(chat_history: str | None = None):
|
||||
def get_no_documents_system_prompt(chat_history: str | None = None, language: str | None = None):
|
||||
chat_history_section = (
|
||||
f"""
|
||||
<chat_history>
|
||||
|
|
@ -170,10 +175,15 @@ NO CHAT HISTORY PROVIDED
|
|||
</chat_history>
|
||||
"""
|
||||
)
|
||||
|
||||
# Add language instruction if specified
|
||||
language_instruction = ""
|
||||
if language:
|
||||
language_instruction = f"\n\nIMPORTANT: Please respond in {language} language. All your responses, explanations, and analysis should be written in {language}."
|
||||
|
||||
return f"""
|
||||
Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")}
|
||||
You are SurfSense, an advanced AI research assistant that helps users create well-structured content for their documents and research.
|
||||
You are SurfSense, an advanced AI research assistant that helps users create well-structured content for their documents and research.{language_instruction}
|
||||
{chat_history_section}
|
||||
<context>
|
||||
You are writing content for a specific sub-section of a document. No specific documents from the user's personal knowledge base are available, so you should create content based on:
|
||||
|
|
|
|||
|
|
@ -296,6 +296,8 @@ class LLMConfig(BaseModel, TimestampMixin):
|
|||
api_key = Column(String, nullable=False)
|
||||
api_base = Column(String(500), nullable=True)
|
||||
|
||||
language = Column(String(50), nullable=True, default="English")
|
||||
|
||||
# For any other parameters that litellm supports
|
||||
litellm_params = Column(JSON, nullable=True, default={})
|
||||
|
||||
|
|
|
|||
|
|
@ -4,8 +4,10 @@ from langchain.schema import AIMessage, HumanMessage
|
|||
from sqlalchemy.exc import IntegrityError, OperationalError
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.future import select
|
||||
from sqlalchemy.orm import selectinload
|
||||
|
||||
from app.db import Chat, SearchSpace, User, get_async_session
|
||||
|
||||
from app.db import Chat, SearchSpace, User, UserSearchSpacePreference, get_async_session
|
||||
from app.schemas import (
|
||||
AISDKChatRequest,
|
||||
ChatCreate,
|
||||
|
|
@ -53,21 +55,60 @@ async def handle_chat_data(
|
|||
request_data.get("document_ids_to_add_in_context")
|
||||
)
|
||||
search_mode_str = validate_search_mode(request_data.get("search_mode"))
|
||||
# print("RESQUEST DATA:", request_data)
|
||||
# print("SELECTED CONNECTORS:", selected_connectors)
|
||||
|
||||
# Check if the search space belongs to the current user
|
||||
try:
|
||||
await check_ownership(session, SearchSpace, search_space_id, user)
|
||||
language_result = await session.execute(
|
||||
select(UserSearchSpacePreference)
|
||||
.options(
|
||||
selectinload(UserSearchSpacePreference.search_space).selectinload(SearchSpace.llm_configs),
|
||||
selectinload(UserSearchSpacePreference.long_context_llm),
|
||||
selectinload(UserSearchSpacePreference.fast_llm),
|
||||
selectinload(UserSearchSpacePreference.strategic_llm)
|
||||
)
|
||||
.filter(
|
||||
UserSearchSpacePreference.search_space_id == search_space_id,
|
||||
UserSearchSpacePreference.user_id == user.id
|
||||
)
|
||||
)
|
||||
user_preference = language_result.scalars().first()
|
||||
print("UserSearchSpacePreference:", user_preference)
|
||||
|
||||
language = None
|
||||
if user_preference and user_preference.search_space and user_preference.search_space.llm_configs:
|
||||
llm_configs = user_preference.search_space.llm_configs
|
||||
# print(f"Found {len(llm_configs)} LLM Configs")
|
||||
# for i, config in enumerate(llm_configs):
|
||||
# print(f" Config {i+1}: name={config.name}, provider={config.provider}, language={getattr(config, 'language', None)}")
|
||||
|
||||
|
||||
for preferred_llm in [user_preference.fast_llm, user_preference.long_context_llm, user_preference.strategic_llm]:
|
||||
if preferred_llm and getattr(preferred_llm, 'language', None):
|
||||
language = preferred_llm.language
|
||||
# print(f"Using language from preferred LLM: {preferred_llm.name} -> {language}")
|
||||
break
|
||||
|
||||
# no preferred llM has language use first available LLM config
|
||||
if not language:
|
||||
first_llm_config = llm_configs[0]
|
||||
language = getattr(first_llm_config, 'language', None)
|
||||
# print(f"Using language from first LLM config: {first_llm_config.name} -> {language}")
|
||||
|
||||
except HTTPException:
|
||||
raise HTTPException(
|
||||
status_code=403, detail="You don't have access to this search space"
|
||||
) from None
|
||||
|
||||
# print("Language selected:", language)
|
||||
langchain_chat_history = []
|
||||
for message in messages[:-1]:
|
||||
if message["role"] == "user":
|
||||
langchain_chat_history.append(HumanMessage(content=message["content"]))
|
||||
elif message["role"] == "assistant":
|
||||
langchain_chat_history.append(AIMessage(content=message["content"]))
|
||||
|
||||
|
||||
response = StreamingResponse(
|
||||
stream_connector_search_results(
|
||||
|
|
@ -80,6 +121,7 @@ async def handle_chat_data(
|
|||
langchain_chat_history,
|
||||
search_mode_str,
|
||||
document_ids_to_add_in_context,
|
||||
language,
|
||||
)
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -299,7 +299,10 @@ async def update_user_llm_preferences(
|
|||
|
||||
# Validate that all provided LLM config IDs belong to the search space
|
||||
update_data = preferences.model_dump(exclude_unset=True)
|
||||
|
||||
|
||||
# Store language from configs to validate consistency
|
||||
languages = set()
|
||||
|
||||
for _key, llm_config_id in update_data.items():
|
||||
if llm_config_id is not None:
|
||||
# Verify the LLM config belongs to the search space
|
||||
|
|
@ -315,6 +318,16 @@ async def update_user_llm_preferences(
|
|||
status_code=404,
|
||||
detail=f"LLM configuration {llm_config_id} not found in this search space",
|
||||
)
|
||||
|
||||
# Collect language for consistency check
|
||||
languages.add(llm_config.language)
|
||||
|
||||
# Check if all selected LLM configs have the same language
|
||||
if len(languages) > 1:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="All selected LLM configurations must have the same language setting",
|
||||
)
|
||||
|
||||
# Update user preferences
|
||||
for key, value in update_data.items():
|
||||
|
|
|
|||
|
|
@ -26,6 +26,9 @@ class LLMConfigBase(BaseModel):
|
|||
litellm_params: dict[str, Any] | None = Field(
|
||||
default=None, description="Additional LiteLLM parameters"
|
||||
)
|
||||
language: str | None = Field(
|
||||
default="English", max_length=50, description="Language for the LLM"
|
||||
)
|
||||
|
||||
|
||||
class LLMConfigCreate(LLMConfigBase):
|
||||
|
|
@ -49,6 +52,9 @@ class LLMConfigUpdate(BaseModel):
|
|||
api_base: str | None = Field(
|
||||
None, max_length=500, description="Optional API base URL"
|
||||
)
|
||||
language: str | None = Field(
|
||||
None, max_length=50, description="Language for the LLM"
|
||||
)
|
||||
litellm_params: dict[str, Any] | None = Field(
|
||||
None, description="Additional LiteLLM parameters"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ async def stream_connector_search_results(
|
|||
langchain_chat_history: list[Any],
|
||||
search_mode_str: str,
|
||||
document_ids_to_add_in_context: list[int],
|
||||
language: str | None = None,
|
||||
) -> AsyncGenerator[str, None]:
|
||||
"""
|
||||
Stream connector search results to the client
|
||||
|
|
@ -66,8 +67,10 @@ async def stream_connector_search_results(
|
|||
"search_mode": search_mode,
|
||||
"research_mode": research_mode,
|
||||
"document_ids_to_add_in_context": document_ids_to_add_in_context,
|
||||
"language": language, # Add language to the configuration
|
||||
}
|
||||
}
|
||||
# print(f"Researcher configuration: {config['configurable']}") # Debug print
|
||||
# Initialize state with database session and streaming service
|
||||
initial_state = State(
|
||||
db_session=session,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue