diff --git a/api/Dockerfile b/api/Dockerfile index d8afae0..91078f9 100644 --- a/api/Dockerfile +++ b/api/Dockerfile @@ -25,7 +25,7 @@ RUN pip install --user --no-cache-dir -r requirements.txt && \ # Copy and install pipecat from local submodule COPY pipecat /tmp/pipecat -RUN pip install --user --no-cache-dir '/tmp/pipecat[cartesia,deepgram,openai,elevenlabs,groq,google,azure,sarvam,soundfile,silero,webrtc,local-smart-turn-v3,speechmatics]' && \ +RUN pip install --user --no-cache-dir '/tmp/pipecat[cartesia,deepgram,openai,elevenlabs,groq,google,azure,sarvam,soundfile,silero,webrtc,local-smart-turn-v3,speechmatics,openrouter]' && \ # Clean up pip cache and temporary pipecat directory rm -rf /root/.cache/pip /tmp/pipecat diff --git a/api/routes/knowledge_base.py b/api/routes/knowledge_base.py index fbe4381..b9fbaae 100644 --- a/api/routes/knowledge_base.py +++ b/api/routes/knowledge_base.py @@ -360,6 +360,9 @@ async def search_chunks( db_client=db_client, api_key=embeddings_api_key, model_id=embeddings_model or "text-embedding-3-small", + base_url=getattr(user_config.embeddings, "base_url", None) + if user_config.embeddings + else None, ) # Perform search diff --git a/api/services/configuration/check_validity.py b/api/services/configuration/check_validity.py index 351aec3..3f25ef2 100644 --- a/api/services/configuration/check_validity.py +++ b/api/services/configuration/check_validity.py @@ -33,6 +33,7 @@ class UserConfigurationValidator: ServiceProviders.OPENAI.value: self._check_openai_api_key, ServiceProviders.DEEPGRAM.value: self._check_deepgram_api_key, ServiceProviders.GROQ.value: self._check_groq_api_key, + ServiceProviders.OPENROUTER.value: self._check_openrouter_api_key, ServiceProviders.ELEVENLABS.value: self._validate_elevenlabs_api_key, ServiceProviders.GOOGLE.value: self._check_google_api_key, ServiceProviders.AZURE.value: self._check_azure_api_key, @@ -150,5 +151,8 @@ class UserConfigurationValidator: def _check_sarvam_api_key(self, model: str, api_key: str) -> bool: return True + def _check_openrouter_api_key(self, model: str, api_key: str) -> bool: + return True + def _check_speechmatics_api_key(self, model: str, api_key: str) -> bool: return True diff --git a/api/services/configuration/registry.py b/api/services/configuration/registry.py index f1d0b69..4443b5a 100644 --- a/api/services/configuration/registry.py +++ b/api/services/configuration/registry.py @@ -15,6 +15,7 @@ class ServiceProviders(str, Enum): OPENAI = "openai" DEEPGRAM = "deepgram" GROQ = "groq" + OPENROUTER = "openrouter" CARTESIA = "cartesia" # NEUPHONIC = "neuphonic" ELEVENLABS = "elevenlabs" @@ -30,6 +31,7 @@ class BaseServiceConfiguration(BaseModel): ServiceProviders.OPENAI, ServiceProviders.DEEPGRAM, ServiceProviders.GROQ, + ServiceProviders.OPENROUTER, ServiceProviders.ELEVENLABS, ServiceProviders.GOOGLE, ServiceProviders.AZURE, @@ -131,6 +133,15 @@ GROQ_MODELS = [ "llama-3.1-8b-instant", "openai/gpt-oss-120b", ] +OPENROUTER_MODELS = [ + "openai/gpt-4.1", + "openai/gpt-4.1-mini", + "anthropic/claude-sonnet-4", + "google/gemini-2.5-flash", + "google/gemini-2.0-flash", + "meta-llama/llama-3.3-70b-instruct", + "deepseek/deepseek-chat-v3-0324", +] AZURE_MODELS = ["gpt-4.1-mini"] DOGRAH_LLM_MODELS = ["default", "accurate", "fast", "lite", "zen"] @@ -160,6 +171,16 @@ class GroqLLMService(BaseLLMConfiguration): api_key: str +@register_llm +class OpenRouterLLMConfiguration(BaseLLMConfiguration): + provider: Literal[ServiceProviders.OPENROUTER] = ServiceProviders.OPENROUTER + model: str = Field( + default="openai/gpt-4.1", json_schema_extra={"examples": OPENROUTER_MODELS} + ) + api_key: str + base_url: str = Field(default="https://openrouter.ai/api/v1") + + @register_llm class AzureLLMService(BaseLLMConfiguration): provider: Literal[ServiceProviders.AZURE] = ServiceProviders.AZURE @@ -183,6 +204,7 @@ LLMConfig = Annotated[ Union[ OpenAILLMService, GroqLLMService, + OpenRouterLLMConfiguration, GoogleLLMService, AzureLLMService, DograhLLMService, @@ -497,8 +519,22 @@ class OpenAIEmbeddingsConfiguration(BaseEmbeddingsConfiguration): api_key: str +OPENROUTER_EMBEDDING_MODELS = ["openai/text-embedding-3-small"] + + +@register_embeddings +class OpenRouterEmbeddingsConfiguration(BaseEmbeddingsConfiguration): + provider: Literal[ServiceProviders.OPENROUTER] = ServiceProviders.OPENROUTER + model: str = Field( + default="openai/text-embedding-3-small", + json_schema_extra={"examples": OPENROUTER_EMBEDDING_MODELS}, + ) + api_key: str + base_url: str = Field(default="https://openrouter.ai/api/v1") + + EmbeddingsConfig = Annotated[ - Union[OpenAIEmbeddingsConfiguration], + Union[OpenAIEmbeddingsConfiguration, OpenRouterEmbeddingsConfiguration], Field(discriminator="provider"), ] diff --git a/api/services/gen_ai/embedding/openai_service.py b/api/services/gen_ai/embedding/openai_service.py index ec4598b..89a58a3 100644 --- a/api/services/gen_ai/embedding/openai_service.py +++ b/api/services/gen_ai/embedding/openai_service.py @@ -50,6 +50,7 @@ class OpenAIEmbeddingService(BaseEmbeddingService): api_key: Optional[str] = None, model_id: str = DEFAULT_MODEL_ID, max_tokens: int = 512, + base_url: Optional[str] = None, ): """Initialize the OpenAI embedding service. @@ -59,6 +60,7 @@ class OpenAIEmbeddingService(BaseEmbeddingService): initialized and operations will fail with a clear error. model_id: OpenAI embedding model ID (default: text-embedding-3-small) max_tokens: Maximum number of tokens per chunk (default: 512) + base_url: Optional base URL for the API (e.g. for OpenRouter) """ self.db = db_client self.model_id = model_id @@ -67,7 +69,10 @@ class OpenAIEmbeddingService(BaseEmbeddingService): # Only initialize OpenAI client if API key is provided self._api_key_configured = bool(api_key) if self._api_key_configured: - self.client = AsyncOpenAI(api_key=api_key) + client_kwargs = {"api_key": api_key} + if base_url: + client_kwargs["base_url"] = base_url + self.client = AsyncOpenAI(**client_kwargs) logger.info(f"OpenAI embedding service initialized with model: {model_id}") else: self.client = None diff --git a/api/services/pipecat/run_pipeline.py b/api/services/pipecat/run_pipeline.py index 4d4ea9d..1f6356a 100644 --- a/api/services/pipecat/run_pipeline.py +++ b/api/services/pipecat/run_pipeline.py @@ -538,9 +538,11 @@ async def _run_pipeline( # Extract embeddings configuration from user config embeddings_api_key = None embeddings_model = None + embeddings_base_url = None if user_config and user_config.embeddings: embeddings_api_key = user_config.embeddings.api_key embeddings_model = user_config.embeddings.model + embeddings_base_url = getattr(user_config.embeddings, "base_url", None) engine = PipecatEngine( llm=llm, @@ -550,6 +552,7 @@ async def _run_pipeline( node_transition_callback=node_transition_callback, embeddings_api_key=embeddings_api_key, embeddings_model=embeddings_model, + embeddings_base_url=embeddings_base_url, ) # Create pipeline components with audio configuration diff --git a/api/services/pipecat/service_factory.py b/api/services/pipecat/service_factory.py index 572289f..129aa00 100644 --- a/api/services/pipecat/service_factory.py +++ b/api/services/pipecat/service_factory.py @@ -19,6 +19,7 @@ from pipecat.services.groq.llm import GroqLLMService from pipecat.services.openai.llm import OpenAILLMService from pipecat.services.openai.stt import OpenAISTTService from pipecat.services.openai.tts import OpenAITTSService +from pipecat.services.openrouter.llm import OpenRouterLLMService from pipecat.services.sarvam.stt import SarvamSTTService from pipecat.services.sarvam.tts import SarvamTTSService from pipecat.services.speechmatics.stt import SpeechmaticsSTTService @@ -253,6 +254,13 @@ def create_llm_service(user_config): model=model, params=OpenAILLMService.InputParams(temperature=0.1), ) + elif user_config.llm.provider == ServiceProviders.OPENROUTER.value: + return OpenRouterLLMService( + api_key=user_config.llm.api_key, + model=model, + base_url=user_config.llm.base_url, + params=OpenAILLMService.InputParams(temperature=0.1), + ) elif user_config.llm.provider == ServiceProviders.GOOGLE.value: # Use the correct InputParams class for Google to avoid propagating OpenAI-specific # NOT_GIVEN sentinels that break Pydantic validation in GoogleLLMService. diff --git a/api/services/workflow/pipecat_engine.py b/api/services/workflow/pipecat_engine.py index b960bdf..8e6c74c 100644 --- a/api/services/workflow/pipecat_engine.py +++ b/api/services/workflow/pipecat_engine.py @@ -70,6 +70,7 @@ class PipecatEngine: ] = None, embeddings_api_key: Optional[str] = None, embeddings_model: Optional[str] = None, + embeddings_base_url: Optional[str] = None, ): self.task = task self.llm = llm @@ -110,6 +111,7 @@ class PipecatEngine: # Embeddings configuration (passed from run_pipeline.py) self._embeddings_api_key: Optional[str] = embeddings_api_key self._embeddings_model: Optional[str] = embeddings_model + self._embeddings_base_url: Optional[str] = embeddings_base_url async def _get_organization_id(self) -> Optional[int]: """Get and cache the organization ID from workflow run.""" @@ -352,6 +354,7 @@ class PipecatEngine: limit=3, # Return top 3 most relevant chunks embeddings_api_key=self._embeddings_api_key, embeddings_model=self._embeddings_model, + embeddings_base_url=self._embeddings_base_url, ) await function_call_params.result_callback(result) diff --git a/api/services/workflow/tools/knowledge_base.py b/api/services/workflow/tools/knowledge_base.py index 9732619..ca79efc 100644 --- a/api/services/workflow/tools/knowledge_base.py +++ b/api/services/workflow/tools/knowledge_base.py @@ -28,6 +28,7 @@ async def retrieve_from_knowledge_base( limit: int = 3, embeddings_api_key: Optional[str] = None, embeddings_model: Optional[str] = None, + embeddings_base_url: Optional[str] = None, ) -> Dict[str, Any]: """Retrieve relevant information from the knowledge base using vector similarity search. @@ -43,6 +44,7 @@ async def retrieve_from_knowledge_base( limit: Maximum number of chunks to return (default: 3) embeddings_api_key: Optional API key for embedding service embeddings_model: Optional model ID for embedding service + embeddings_base_url: Optional base URL for embedding service Returns: Dictionary containing: @@ -70,6 +72,7 @@ async def retrieve_from_knowledge_base( limit, embeddings_api_key, embeddings_model, + embeddings_base_url, ) # Create span with parent context @@ -106,6 +109,7 @@ async def retrieve_from_knowledge_base( limit, embeddings_api_key, embeddings_model, + embeddings_base_url, ) # Add result metadata to span @@ -179,6 +183,7 @@ async def retrieve_from_knowledge_base( limit, embeddings_api_key, embeddings_model, + embeddings_base_url, ) else: # Tracing is disabled - perform retrieval without tracing @@ -199,6 +204,7 @@ async def _perform_retrieval( limit: int, embeddings_api_key: Optional[str] = None, embeddings_model: Optional[str] = None, + embeddings_base_url: Optional[str] = None, ) -> Dict[str, Any]: """Internal function to perform the actual retrieval operation. @@ -213,6 +219,7 @@ async def _perform_retrieval( max_tokens=128, # This is only used for chunking, not for retrieval api_key=embeddings_api_key, model_id=embeddings_model or "text-embedding-3-small", + base_url=embeddings_base_url, ) # Perform vector similarity search diff --git a/api/tasks/knowledge_base_processing.py b/api/tasks/knowledge_base_processing.py index e1a4cea..fe0d996 100644 --- a/api/tasks/knowledge_base_processing.py +++ b/api/tasks/knowledge_base_processing.py @@ -124,11 +124,13 @@ async def process_knowledge_base_document( # Try to get user's embeddings configuration embeddings_api_key = None embeddings_model = None + embeddings_base_url = None if document.created_by: user_config = await db_client.get_user_configurations(document.created_by) if user_config.embeddings: embeddings_api_key = user_config.embeddings.api_key embeddings_model = user_config.embeddings.model + embeddings_base_url = getattr(user_config.embeddings, "base_url", None) logger.info(f"Using user embeddings config: model={embeddings_model}") # Check if API key is configured @@ -148,6 +150,7 @@ async def process_knowledge_base_document( max_tokens=max_tokens, api_key=embeddings_api_key, model_id=embeddings_model or "text-embedding-3-small", + base_url=embeddings_base_url, ) # Step 1: Convert document with docling diff --git a/docs/deployment/docker.mdx b/docs/deployment/docker.mdx index abc6c6e..6408e18 100644 --- a/docs/deployment/docker.mdx +++ b/docs/deployment/docker.mdx @@ -70,6 +70,10 @@ It will automatically: ### Start the Application + +Please ensure that Docker Compose is installed on your machine before proceeding further. You can check whether its installed by running `docker compose version` command. If its not installed, please install it by following your server provider documentation. + + After the setup script completes, start Dograh: ```bash diff --git a/scripts/setup_pipecat.sh b/scripts/setup_pipecat.sh index 3db9836..70b3663 100755 --- a/scripts/setup_pipecat.sh +++ b/scripts/setup_pipecat.sh @@ -16,7 +16,7 @@ git submodule update --init --recursive # Install pipecat in editable mode with all extras echo "Installing pipecat dependencies..." -pip install -e ./pipecat[cartesia,deepgram,openai,elevenlabs,groq,google,azure,sarvam,soundfile,silero,webrtc,local-smart-turn-v3,speechmatics] +pip install -e ./pipecat[cartesia,deepgram,openai,elevenlabs,groq,google,azure,sarvam,soundfile,silero,webrtc,local-smart-turn-v3,speechmatics,openrouter] # Install other requirements echo "Installing dograh API requirements..."