Merge remote-tracking branch 'upstream/main' into fix/ui-log-message-overlap

Merge remote-tracking branch 'upstream/main' into fix/ui-log-message-overlap

- Bring upstream fixes and dependency updates
- Resolve UI overlap for long log messages in dashboard table
- Keep current feature branch changes (fix/ui-log-message-overlap)
This commit is contained in:
Anish Sarkar 2025-10-13 21:21:04 +05:30
commit f3e1cf640d
20 changed files with 318 additions and 19 deletions

View file

@ -2,7 +2,6 @@
Revision ID: '23'
Revises: '22'
Create Date: 2025-01-10 12:00:00.000000
"""

View file

@ -2,7 +2,6 @@
Revision ID: 24
Revises: 23
Create Date: 2025-01-10 14:00:00.000000
"""

View file

@ -2,7 +2,6 @@
Revision ID: 25
Revises: 24
Create Date: 2025-01-10 14:00:00.000000
Changes:
1. Migrate llm_configs from user association to search_space association

View file

@ -0,0 +1,69 @@
"""Add language column to llm_configs
Revision ID: 26
Revises: 25
Changes:
1. Add language column to llm_configs table with default value of 'English'
"""
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "26"
down_revision: str | None = "25"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
"""Add language column to llm_configs table."""
from sqlalchemy import inspect
conn = op.get_bind()
inspector = inspect(conn)
# Get existing columns
llm_config_columns = [col["name"] for col in inspector.get_columns("llm_configs")]
# Add language column if it doesn't exist
if "language" not in llm_config_columns:
op.add_column(
"llm_configs",
sa.Column(
"language",
sa.String(length=50),
nullable=True,
server_default="English",
),
)
# Update existing rows to have 'English' as default
op.execute(
"""
UPDATE llm_configs
SET language = 'English'
WHERE language IS NULL
"""
)
def downgrade() -> None:
"""Remove language column from llm_configs table."""
from sqlalchemy import inspect
conn = op.get_bind()
inspector = inspect(conn)
# Get existing columns
llm_config_columns = [col["name"] for col in inspector.get_columns("llm_configs")]
# Drop language column if it exists
if "language" in llm_config_columns:
op.drop_column("llm_configs", "language")

View file

@ -37,6 +37,7 @@ class Configuration:
search_mode: SearchMode
research_mode: ResearchMode
document_ids_to_add_in_context: list[int]
language: str | None = None
@classmethod
def from_runnable_config(

View file

@ -578,6 +578,7 @@ async def write_answer_outline(
num_sections = configuration.num_sections
user_id = configuration.user_id
search_space_id = configuration.search_space_id
language = configuration.language # Get language from configuration
writer(
{
@ -628,7 +629,7 @@ async def write_answer_outline(
# Create messages for the LLM
messages = [
SystemMessage(content=get_answer_outline_system_prompt()),
SystemMessage(content=get_answer_outline_system_prompt(language=language)),
HumanMessage(content=human_message_content),
]
@ -2000,6 +2001,7 @@ async def handle_qna_workflow(
"relevant_documents": all_documents, # Use combined documents
"user_id": configuration.user_id,
"search_space_id": configuration.search_space_id,
"language": configuration.language,
}
}

View file

@ -1,9 +1,18 @@
import datetime
def get_answer_outline_system_prompt():
def _build_language_instruction(language: str | None = None):
if language:
return f"\n\nIMPORTANT: Please respond in {language} language. All your responses, explanations, and analysis should be written in {language}."
return ""
def get_answer_outline_system_prompt(language: str | None = None) -> str:
language_instruction = _build_language_instruction(language)
return f"""
Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")}
{language_instruction}
<answer_outline_system>
You are an expert research assistant specializing in structuring information. Your task is to create a detailed and logical research outline based on the user's query. This outline will serve as the blueprint for generating a comprehensive research report.

View file

@ -20,6 +20,7 @@ class Configuration:
] # Documents provided directly to the agent for answering
user_id: str # User identifier
search_space_id: int # Search space identifier
language: str | None = None # Language for responses
@classmethod
def from_runnable_config(

View file

@ -102,7 +102,7 @@ async def answer_question(state: State, config: RunnableConfig) -> dict[str, Any
user_query = configuration.user_query
user_id = configuration.user_id
search_space_id = configuration.search_space_id
language = configuration.language
# Get user's fast LLM
llm = await get_user_fast_llm(state.db_session, user_id, search_space_id)
if not llm:
@ -127,7 +127,9 @@ async def answer_question(state: State, config: RunnableConfig) -> dict[str, Any
"""
# Use initial system prompt for token calculation
initial_system_prompt = get_qna_citation_system_prompt(chat_history_str)
initial_system_prompt = get_qna_citation_system_prompt(
chat_history_str, language
)
base_messages = [
SystemMessage(content=initial_system_prompt),
HumanMessage(content=base_human_message_template),
@ -146,9 +148,9 @@ async def answer_question(state: State, config: RunnableConfig) -> dict[str, Any
# Choose system prompt based on final document availability
system_prompt = (
get_qna_citation_system_prompt(chat_history_str)
get_qna_citation_system_prompt(chat_history_str, language)
if has_documents
else get_qna_no_documents_system_prompt(chat_history_str)
else get_qna_no_documents_system_prompt(chat_history_str, language)
)
# Generate documents section

View file

@ -1,7 +1,11 @@
import datetime
from ..prompts import _build_language_instruction
def get_qna_citation_system_prompt(chat_history: str | None = None):
def get_qna_citation_system_prompt(
chat_history: str | None = None, language: str | None = None
):
chat_history_section = (
f"""
<chat_history>
@ -16,9 +20,11 @@ NO CHAT HISTORY PROVIDED
"""
)
# Add language instruction if specified
language_instruction = _build_language_instruction(language)
return f"""
Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")}
You are SurfSense, an advanced AI research assistant that provides detailed, well-researched answers to user questions by synthesizing information from multiple personal knowledge sources.
You are SurfSense, an advanced AI research assistant that provides detailed, well-researched answers to user questions by synthesizing information from multiple personal knowledge sources.{language_instruction}
{chat_history_section}
<knowledge_sources>
- EXTENSION: "Web content saved via SurfSense browser extension" (personal browsing history)
@ -149,7 +155,9 @@ Make sure your response:
"""
def get_qna_no_documents_system_prompt(chat_history: str | None = None):
def get_qna_no_documents_system_prompt(
chat_history: str | None = None, language: str | None = None
):
chat_history_section = (
f"""
<chat_history>
@ -164,9 +172,12 @@ NO CHAT HISTORY PROVIDED
"""
)
# Add language instruction if specified
language_instruction = _build_language_instruction(language)
return f"""
Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")}
You are SurfSense, an advanced AI research assistant that provides helpful, detailed answers to user questions in a conversational manner.
You are SurfSense, an advanced AI research assistant that provides helpful, detailed answers to user questions in a conversational manner.{language_instruction}
{chat_history_section}
<context>
The user has asked a question but there are no specific documents from their personal knowledge base available to answer it. You should provide a helpful response based on:

View file

@ -1,7 +1,11 @@
import datetime
from ..prompts import _build_language_instruction
def get_citation_system_prompt(chat_history: str | None = None):
def get_citation_system_prompt(
chat_history: str | None = None, language: str | None = None
):
chat_history_section = (
f"""
<chat_history>
@ -16,9 +20,12 @@ NO CHAT HISTORY PROVIDED
"""
)
# Add language instruction if specified
language_instruction = _build_language_instruction(language)
return f"""
Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")}
You are SurfSense, an advanced AI research assistant that synthesizes information from multiple knowledge sources to provide comprehensive, well-cited answers to user queries.
You are SurfSense, an advanced AI research assistant that synthesizes information from multiple knowledge sources to provide comprehensive, well-cited answers to user queries.{language_instruction}
{chat_history_section}
<knowledge_sources>
- EXTENSION: "Web content saved via SurfSense browser extension" (personal browsing history)
@ -156,7 +163,9 @@ Make sure your response:
"""
def get_no_documents_system_prompt(chat_history: str | None = None):
def get_no_documents_system_prompt(
chat_history: str | None = None, language: str | None = None
):
chat_history_section = (
f"""
<chat_history>
@ -171,9 +180,12 @@ NO CHAT HISTORY PROVIDED
"""
)
# Add language instruction if specified
language_instruction = _build_language_instruction(language)
return f"""
Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")}
You are SurfSense, an advanced AI research assistant that helps users create well-structured content for their documents and research.
You are SurfSense, an advanced AI research assistant that helps users create well-structured content for their documents and research.{language_instruction}
{chat_history_section}
<context>
You are writing content for a specific sub-section of a document. No specific documents from the user's personal knowledge base are available, so you should create content based on:

View file

@ -296,6 +296,8 @@ class LLMConfig(BaseModel, TimestampMixin):
api_key = Column(String, nullable=False)
api_base = Column(String(500), nullable=True)
language = Column(String(50), nullable=True, default="English")
# For any other parameters that litellm supports
litellm_params = Column(JSON, nullable=True, default={})

View file

@ -4,8 +4,9 @@ from langchain.schema import AIMessage, HumanMessage
from sqlalchemy.exc import IntegrityError, OperationalError
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
from sqlalchemy.orm import selectinload
from app.db import Chat, SearchSpace, User, get_async_session
from app.db import Chat, SearchSpace, User, UserSearchSpacePreference, get_async_session
from app.schemas import (
AISDKChatRequest,
ChatCreate,
@ -53,10 +54,51 @@ async def handle_chat_data(
request_data.get("document_ids_to_add_in_context")
)
search_mode_str = validate_search_mode(request_data.get("search_mode"))
# print("RESQUEST DATA:", request_data)
# print("SELECTED CONNECTORS:", selected_connectors)
# Check if the search space belongs to the current user
try:
await check_ownership(session, SearchSpace, search_space_id, user)
language_result = await session.execute(
select(UserSearchSpacePreference)
.options(
selectinload(UserSearchSpacePreference.search_space).selectinload(
SearchSpace.llm_configs
),
selectinload(UserSearchSpacePreference.long_context_llm),
selectinload(UserSearchSpacePreference.fast_llm),
selectinload(UserSearchSpacePreference.strategic_llm),
)
.filter(
UserSearchSpacePreference.search_space_id == search_space_id,
UserSearchSpacePreference.user_id == user.id,
)
)
user_preference = language_result.scalars().first()
# print("UserSearchSpacePreference:", user_preference)
language = None
if (
user_preference
and user_preference.search_space
and user_preference.search_space.llm_configs
):
llm_configs = user_preference.search_space.llm_configs
for preferred_llm in [
user_preference.fast_llm,
user_preference.long_context_llm,
user_preference.strategic_llm,
]:
if preferred_llm and getattr(preferred_llm, "language", None):
language = preferred_llm.language
break
if not language:
first_llm_config = llm_configs[0]
language = getattr(first_llm_config, "language", None)
except HTTPException:
raise HTTPException(
status_code=403, detail="You don't have access to this search space"
@ -80,6 +122,7 @@ async def handle_chat_data(
langchain_chat_history,
search_mode_str,
document_ids_to_add_in_context,
language,
)
)

View file

@ -300,6 +300,9 @@ async def update_user_llm_preferences(
# Validate that all provided LLM config IDs belong to the search space
update_data = preferences.model_dump(exclude_unset=True)
# Store language from configs to validate consistency
languages = set()
for _key, llm_config_id in update_data.items():
if llm_config_id is not None:
# Verify the LLM config belongs to the search space
@ -316,6 +319,16 @@ async def update_user_llm_preferences(
detail=f"LLM configuration {llm_config_id} not found in this search space",
)
# Collect language for consistency check
languages.add(llm_config.language)
# Check if all selected LLM configs have the same language
if len(languages) > 1:
raise HTTPException(
status_code=400,
detail="All selected LLM configurations must have the same language setting",
)
# Update user preferences
for key, value in update_data.items():
setattr(preference, key, value)

View file

@ -26,6 +26,9 @@ class LLMConfigBase(BaseModel):
litellm_params: dict[str, Any] | None = Field(
default=None, description="Additional LiteLLM parameters"
)
language: str | None = Field(
default="English", max_length=50, description="Language for the LLM"
)
class LLMConfigCreate(LLMConfigBase):
@ -49,6 +52,9 @@ class LLMConfigUpdate(BaseModel):
api_base: str | None = Field(
None, max_length=500, description="Optional API base URL"
)
language: str | None = Field(
None, max_length=50, description="Language for the LLM"
)
litellm_params: dict[str, Any] | None = Field(
None, description="Additional LiteLLM parameters"
)

View file

@ -20,6 +20,7 @@ async def stream_connector_search_results(
langchain_chat_history: list[Any],
search_mode_str: str,
document_ids_to_add_in_context: list[int],
language: str | None = None,
) -> AsyncGenerator[str, None]:
"""
Stream connector search results to the client
@ -66,8 +67,10 @@ async def stream_connector_search_results(
"search_mode": search_mode,
"research_mode": research_mode,
"document_ids_to_add_in_context": document_ids_to_add_in_context,
"language": language, # Add language to the configuration
}
}
# print(f"Researcher configuration: {config['configurable']}") # Debug print
# Initialize state with database session and streaming service
initial_state = State(
db_session=session,

View file

@ -18,6 +18,7 @@ import {
SelectValue,
} from "@/components/ui/select";
import { LLM_PROVIDERS } from "@/contracts/enums/llm-providers";
import { LANGUAGES } from "@/contracts/enums/languages";
import { type CreateLLMConfig, useLLMConfigs } from "@/hooks/use-llm-configs";
import InferenceParamsEditor from "../inference-params-editor";
@ -42,6 +43,7 @@ export function AddProviderStep({
model_name: "",
api_key: "",
api_base: "",
language: "English",
litellm_params: {},
search_space_id: searchSpaceId,
});
@ -70,6 +72,7 @@ export function AddProviderStep({
model_name: "",
api_key: "",
api_base: "",
language: "English",
litellm_params: {},
search_space_id: searchSpaceId,
});
@ -119,6 +122,7 @@ export function AddProviderStep({
</div>
<p className="text-sm text-muted-foreground">
Model: {config.model_name}
{config.language && ` • Language: ${config.language}`}
{config.api_base && ` • Base: ${config.api_base}`}
</p>
</div>
@ -169,7 +173,7 @@ export function AddProviderStep({
</CardHeader>
<CardContent>
<form onSubmit={handleSubmit} className="space-y-4">
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
<div className="grid grid-cols-1 md:grid-cols-3 gap-4">
<div className="space-y-2">
<Label htmlFor="name">Configuration Name *</Label>
<Input
@ -199,6 +203,27 @@ export function AddProviderStep({
</SelectContent>
</Select>
</div>
{/* language */}
<div className="space-y-2">
<Label htmlFor="language">Language (Optional)</Label>
<Select
value={formData.language || "English"}
onValueChange={(value) => handleInputChange("language", value)}
>
<SelectTrigger>
<SelectValue placeholder="Select language" />
</SelectTrigger>
<SelectContent>
{LANGUAGES.map((language) => (
<SelectItem key={language.value} value={language.value}>
{language.label}
</SelectItem>
))}
</SelectContent>
</Select>
</div>
</div>
{formData.provider === "CUSTOM" && (

View file

@ -38,6 +38,7 @@ import {
SelectValue,
} from "@/components/ui/select";
import { LLM_PROVIDERS } from "@/contracts/enums/llm-providers";
import { LANGUAGES } from "@/contracts/enums/languages";
import { type CreateLLMConfig, type LLMConfig, useLLMConfigs } from "@/hooks/use-llm-configs";
import InferenceParamsEditor from "../inference-params-editor";
@ -65,6 +66,7 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) {
model_name: "",
api_key: "",
api_base: "",
language: "English",
litellm_params: {},
search_space_id: searchSpaceId,
});
@ -80,6 +82,7 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) {
model_name: editingConfig.model_name,
api_key: editingConfig.api_key,
api_base: editingConfig.api_base || "",
language: editingConfig.language || "English",
litellm_params: editingConfig.litellm_params || {},
search_space_id: searchSpaceId,
});
@ -118,6 +121,7 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) {
model_name: "",
api_key: "",
api_base: "",
language: "English",
litellm_params: {},
search_space_id: searchSpaceId,
});
@ -323,6 +327,13 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) {
<p className="text-sm text-muted-foreground font-mono">
{config.model_name}
</p>
{config.language && (
<div className="flex items-center gap-2">
<Badge variant="outline" className="text-xs">
{config.language}
</Badge>
</div>
)}
</div>
</div>
@ -432,6 +443,7 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) {
model_name: "",
api_key: "",
api_base: "",
language: "",
litellm_params: {},
search_space_id: searchSpaceId,
});
@ -524,6 +536,25 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) {
)}
</div>
<div className="space-y-2">
<Label htmlFor="language">Language (Optional)</Label>
<Select
value={formData.language || "English"}
onValueChange={(value) => handleInputChange("language", value)}
>
<SelectTrigger>
<SelectValue placeholder="Select language" />
</SelectTrigger>
<SelectContent>
{LANGUAGES.map((language) => (
<SelectItem key={language.value} value={language.value}>
{language.label}
</SelectItem>
))}
</SelectContent>
</Select>
</div>
<div className="space-y-2">
<Label htmlFor="api_key">API Key *</Label>
<Input
@ -579,6 +610,7 @@ export function ModelConfigManager({ searchSpaceId }: ModelConfigManagerProps) {
model_name: "",
api_key: "",
api_base: "",
language: "",
litellm_params: {},
search_space_id: searchSpaceId,
});

View file

@ -0,0 +1,69 @@
export interface Language {
value: string;
label: string;
}
export const LANGUAGES: Language[] = [
{ value: "English", label: "English" },
{ value: "Spanish", label: "Spanish" },
{ value: "French", label: "French" },
{ value: "German", label: "German" },
{ value: "Italian", label: "Italian" },
{ value: "Portuguese", label: "Portuguese" },
{ value: "Russian", label: "Russian" },
{ value: "Chinese", label: "Chinese (Simplified)" },
{ value: "Chinese-traditional", label: "Chinese (Traditional)" },
{ value: "Japanese", label: "Japanese" },
{ value: "Korean", label: "Korean" },
{ value: "Arabic", label: "Arabic" },
{ value: "Hindi", label: "Hindi" },
{ value: "Dutch", label: "Dutch" },
{ value: "Swedish", label: "Swedish" },
{ value: "Norwegian", label: "Norwegian" },
{ value: "Danish", label: "Danish" },
{ value: "Finnish", label: "Finnish" },
{ value: "Polish", label: "Polish" },
{ value: "Czech", label: "Czech" },
{ value: "Hungarian", label: "Hungarian" },
{ value: "Romanian", label: "Romanian" },
{ value: "Bulgarian", label: "Bulgarian" },
{ value: "Croatian", label: "Croatian" },
{ value: "Serbian", label: "Serbian" },
{ value: "Slovenian", label: "Slovenian" },
{ value: "Slovak", label: "Slovak" },
{ value: "Lithuanian", label: "Lithuanian" },
{ value: "Latvian", label: "Latvian" },
{ value: "Estonian", label: "Estonian" },
{ value: "Greek", label: "Greek" },
{ value: "Turkish", label: "Turkish" },
{ value: "Hebrew", label: "Hebrew" },
{ value: "Thai", label: "Thai" },
{ value: "Vietnamese", label: "Vietnamese" },
{ value: "Indonesian", label: "Indonesian" },
{ value: "Malay", label: "Malay" },
{ value: "Tagalog", label: "Filipino/Tagalog" },
{ value: "Bengali", label: "Bengali" },
{ value: "Tamil", label: "Tamil" },
{ value: "Telugu", label: "Telugu" },
{ value: "Marathi", label: "Marathi" },
{ value: "Gujarati", label: "Gujarati" },
{ value: "Kannada", label: "Kannada" },
{ value: "Malayalam", label: "Malayalam" },
{ value: "Punjabi", label: "Punjabi" },
{ value: "Urdu", label: "Urdu" },
{ value: "Persian", label: "Persian/Farsi" },
{ value: "Swahili", label: "Swahili" },
{ value: "Afrikaans", label: "Afrikaans" },
{ value: "Amharic", label: "Amharic" },
{ value: "Ukrainian", label: "Ukrainian" },
{ value: "Belarusian", label: "Belarusian" },
{ value: "Georgian", label: "Georgian" },
{ value: "Armenian", label: "Armenian" },
{ value: "Azerbaijani", label: "Azerbaijani" },
{ value: "Kazakh", label: "Kazakh" },
{ value: "Uzbek", label: "Uzbek" },
{ value: "Kyrgyz", label: "Kyrgyz" },
{ value: "Tajik", label: "Tajik" },
{ value: "Turkmen", label: "Turkmen" },
{ value: "Mongolian", label: "Mongolian" },
];

View file

@ -10,6 +10,7 @@ export interface LLMConfig {
model_name: string;
api_key: string;
api_base?: string;
language?: string;
litellm_params?: Record<string, any>;
created_at: string;
search_space_id: number;
@ -31,6 +32,7 @@ export interface CreateLLMConfig {
model_name: string;
api_key: string;
api_base?: string;
language?: string;
litellm_params?: Record<string, any>;
search_space_id: number;
}