mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-25 08:48:13 +02:00
feat: add Azure AI multi-provider support (TTS, STT, Embeddings, Realtime)
Enables Azure AI services across all model layers so users with Azure credits can consolidate billing on a single provider. - Voice (TTS): AzureSpeechTTSConfiguration via azure_speech provider - Transcriber (STT): AzureSpeechSTTConfiguration via azure_speech provider - Embedding: AzureOpenAIEmbeddingsConfiguration via azure provider - Realtime: AzureRealtimeLLMConfiguration via azure_realtime provider New files: - api/services/pipecat/realtime/azure_realtime.py - api/services/gen_ai/embedding/azure_openai_service.py - api/tests/test_azure_speech_service_factory.py The UI picks up all four providers automatically from the schema — no frontend changes required. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
e695436fb3
commit
dbbf362315
12 changed files with 883 additions and 28 deletions
|
|
@ -1,9 +1,12 @@
|
|||
"""Embedding services for document processing and retrieval."""
|
||||
|
||||
from .azure_openai_service import AzureEmbeddingAPIKeyNotConfiguredError, AzureOpenAIEmbeddingService
|
||||
from .base import BaseEmbeddingService
|
||||
from .openai_service import EmbeddingAPIKeyNotConfiguredError, OpenAIEmbeddingService
|
||||
|
||||
__all__ = [
|
||||
"AzureEmbeddingAPIKeyNotConfiguredError",
|
||||
"AzureOpenAIEmbeddingService",
|
||||
"BaseEmbeddingService",
|
||||
"EmbeddingAPIKeyNotConfiguredError",
|
||||
"OpenAIEmbeddingService",
|
||||
|
|
|
|||
119
api/services/gen_ai/embedding/azure_openai_service.py
Normal file
119
api/services/gen_ai/embedding/azure_openai_service.py
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
"""Azure OpenAI embedding service.
|
||||
|
||||
Uses the Azure OpenAI REST API for text embeddings, compatible with
|
||||
text-embedding-3-small, text-embedding-3-large, and text-embedding-ada-002
|
||||
deployments.
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from loguru import logger
|
||||
from openai import AsyncAzureOpenAI
|
||||
|
||||
from api.db.db_client import DBClient
|
||||
from api.utils.url_security import validate_user_configured_service_url
|
||||
|
||||
from .base import BaseEmbeddingService
|
||||
|
||||
DEFAULT_MODEL_ID = "text-embedding-3-small"
|
||||
EMBEDDING_DIMENSION = 1536
|
||||
|
||||
|
||||
class AzureEmbeddingAPIKeyNotConfiguredError(Exception):
|
||||
"""Raised when Azure OpenAI credentials are not configured for embeddings."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
"Azure OpenAI endpoint or API key not configured. Please set your "
|
||||
"endpoint and API key in Model Configurations > Embedding to use "
|
||||
"document processing."
|
||||
)
|
||||
|
||||
|
||||
class AzureOpenAIEmbeddingService(BaseEmbeddingService):
|
||||
"""Embedding service using Azure OpenAI text-embedding deployments."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
db_client: DBClient,
|
||||
api_key: Optional[str] = None,
|
||||
endpoint: Optional[str] = None,
|
||||
model_id: str = DEFAULT_MODEL_ID,
|
||||
api_version: str = "2024-02-15-preview",
|
||||
):
|
||||
"""Initialize the Azure OpenAI embedding service.
|
||||
|
||||
Args:
|
||||
db_client: Database client for vector similarity search.
|
||||
api_key: Azure OpenAI API key.
|
||||
endpoint: Azure OpenAI resource endpoint (e.g. https://<resource>.openai.azure.com).
|
||||
model_id: Deployment name, used as both the deployment and model identifier.
|
||||
api_version: Azure OpenAI API version.
|
||||
"""
|
||||
self.db = db_client
|
||||
self.model_id = model_id
|
||||
|
||||
self._configured = bool(api_key and endpoint)
|
||||
if self._configured:
|
||||
validate_user_configured_service_url(endpoint, field_name="endpoint")
|
||||
self.client = AsyncAzureOpenAI(
|
||||
api_key=api_key,
|
||||
azure_endpoint=endpoint,
|
||||
api_version=api_version,
|
||||
)
|
||||
logger.info(
|
||||
f"Azure OpenAI embedding service initialized with deployment: {model_id}"
|
||||
)
|
||||
else:
|
||||
self.client = None
|
||||
logger.warning(
|
||||
"Azure OpenAI embedding service initialized without credentials. "
|
||||
"Operations will fail until endpoint and API key are configured."
|
||||
)
|
||||
|
||||
def get_model_id(self) -> str:
|
||||
return self.model_id
|
||||
|
||||
def get_embedding_dimension(self) -> int:
|
||||
return EMBEDDING_DIMENSION
|
||||
|
||||
def _ensure_configured(self):
|
||||
if not self._configured or self.client is None:
|
||||
raise AzureEmbeddingAPIKeyNotConfiguredError()
|
||||
|
||||
async def embed_texts(self, texts: List[str]) -> List[List[float]]:
|
||||
"""Embed a batch of texts using Azure OpenAI API."""
|
||||
self._ensure_configured()
|
||||
try:
|
||||
response = await self.client.embeddings.create(
|
||||
input=texts,
|
||||
model=self.model_id,
|
||||
)
|
||||
return [item.embedding for item in response.data]
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating Azure OpenAI embeddings: {e}")
|
||||
raise
|
||||
|
||||
async def embed_query(self, query: str) -> List[float]:
|
||||
"""Embed a single query text using Azure OpenAI API."""
|
||||
self._ensure_configured()
|
||||
embeddings = await self.embed_texts([query])
|
||||
return embeddings[0]
|
||||
|
||||
async def search_similar_chunks(
|
||||
self,
|
||||
query: str,
|
||||
organization_id: int,
|
||||
limit: int = 5,
|
||||
document_uuids: Optional[List[str]] = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Search for similar chunks using vector similarity."""
|
||||
self._ensure_configured()
|
||||
query_embedding = await self.embed_query(query)
|
||||
return await self.db.search_similar_chunks(
|
||||
query_embedding=query_embedding,
|
||||
organization_id=organization_id,
|
||||
limit=limit,
|
||||
document_uuids=document_uuids,
|
||||
embedding_model=self.model_id,
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue