dograh/api/services/gen_ai/embedding/base.py
Abhishek ef5b9e40a9
feat: knowledge base functionality for the voice agent (#120)
* feat: upload file and store embedding

* feat: add documents in nodes

* feat: add openai embedding service
2026-01-17 14:37:03 +05:30

75 lines
2 KiB
Python

"""Base class for embedding services."""
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional
class BaseEmbeddingService(ABC):
"""Abstract base class for embedding services.
All embedding services (SentenceTransformer, OpenAI, etc.) should inherit from this class
and implement the required methods.
"""
@abstractmethod
def get_model_id(self) -> str:
"""Return the model identifier.
Returns:
String identifier for the model (e.g., 'sentence-transformers/all-MiniLM-L6-v2')
"""
pass
@abstractmethod
def get_embedding_dimension(self) -> int:
"""Return the embedding dimension.
Returns:
Integer dimension of the embedding vectors
"""
pass
@abstractmethod
async def embed_texts(self, texts: List[str]) -> List[List[float]]:
"""Embed a batch of texts.
Args:
texts: List of text strings to embed
Returns:
List of embedding vectors (each vector is a list of floats)
"""
pass
@abstractmethod
async def embed_query(self, query: str) -> List[float]:
"""Embed a single query text.
Args:
query: Query text to embed
Returns:
Embedding vector as list of floats
"""
pass
@abstractmethod
async def search_similar_chunks(
self,
query: str,
organization_id: int,
limit: int = 5,
document_uuids: Optional[List[str]] = None,
) -> List[Dict[str, Any]]:
"""Search for similar chunks using vector similarity.
Args:
query: Search query text
organization_id: Organization ID for scoping
limit: Maximum number of results to return
document_uuids: Optional list of document UUIDs to filter by
Returns:
List of dictionaries containing chunk data and similarity scores
"""
pass