feat: knowledge base functionality for the voice agent (#120)

* feat: upload file and store embedding * feat: add documents in nodes * feat: add openai embedding service
2026-06-25 08:48:13 +02:00 · 2026-01-17 14:37:03 +05:30 · 2026-01-17 14:37:03 +05:30 · ef5b9e40a9
commit ef5b9e40a9
parent e2fa4bbb98
52 changed files with 4551 additions and 114 deletions
--- a/api/services/gen_ai/embedding/base.py
+++ b/api/services/gen_ai/embedding/base.py
@ -0,0 +1,75 @@
+"""Base class for embedding services."""
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List, Optional
+
+
+class BaseEmbeddingService(ABC):
+    """Abstract base class for embedding services.
+
+    All embedding services (SentenceTransformer, OpenAI, etc.) should inherit from this class
+    and implement the required methods.
+    """
+
+    @abstractmethod
+    def get_model_id(self) -> str:
+        """Return the model identifier.
+
+        Returns:
+            String identifier for the model (e.g., 'sentence-transformers/all-MiniLM-L6-v2')
+        """
+        pass
+
+    @abstractmethod
+    def get_embedding_dimension(self) -> int:
+        """Return the embedding dimension.
+
+        Returns:
+            Integer dimension of the embedding vectors
+        """
+        pass
+
+    @abstractmethod
+    async def embed_texts(self, texts: List[str]) -> List[List[float]]:
+        """Embed a batch of texts.
+
+        Args:
+            texts: List of text strings to embed
+
+        Returns:
+            List of embedding vectors (each vector is a list of floats)
+        """
+        pass
+
+    @abstractmethod
+    async def embed_query(self, query: str) -> List[float]:
+        """Embed a single query text.
+
+        Args:
+            query: Query text to embed
+
+        Returns:
+            Embedding vector as list of floats
+        """
+        pass
+
+    @abstractmethod
+    async def search_similar_chunks(
+        self,
+        query: str,
+        organization_id: int,
+        limit: int = 5,
+        document_uuids: Optional[List[str]] = None,
+    ) -> List[Dict[str, Any]]:
+        """Search for similar chunks using vector similarity.
+
+        Args:
+            query: Search query text
+            organization_id: Organization ID for scoping
+            limit: Maximum number of results to return
+            document_uuids: Optional list of document UUIDs to filter by
+
+        Returns:
+            List of dictionaries containing chunk data and similarity scores
+        """
+        pass