feat: add full document mode in knowledge base

2026-06-13 08:15:21 +02:00 · 2026-04-09 13:49:20 +05:30 · 2026-04-09 13:49:20 +05:30 · 87c8c5e2c8
commit 87c8c5e2c8
parent c085398933
26 changed files with 1144 additions and 351 deletions
--- a/api/services/pipecat/audio_file_cache.py
+++ b/api/services/pipecat/audio_file_cache.py
@ -0,0 +1,220 @@
+"""Shared utilities for downloading, converting, and caching audio files.
+
+Provides helpers used by both the recording audio cache and the ambient
+noise cache to avoid duplicating download / ffmpeg / disk-cache logic.
+"""
+
+import asyncio
+import os
+import shutil
+import tempfile
+from typing import Literal, Optional
+
+from loguru import logger
+
+from api.constants import APP_ROOT_DIR
+
+# ---------------------------------------------------------------------------
+# Filesystem cache directory (shared by all audio caches)
+# ---------------------------------------------------------------------------
+
+CACHE_DIR = os.path.join(os.path.dirname(APP_ROOT_DIR), "dograh_pcm_cache")
+os.makedirs(CACHE_DIR, exist_ok=True)
+
+
+# ---------------------------------------------------------------------------
+# Download helper
+# ---------------------------------------------------------------------------
+
+
+async def download_storage_file(
+    storage_key: str,
+    storage_backend: str,
+    get_storage_fn,
+) -> Optional[str]:
+    """Download a file from object storage to a local temp file.
+
+    Returns the temp file path on success, or None on failure.
+    The caller is responsible for cleaning up the temp file.
+    """
+    ext = ext_from_key(storage_key)
+    fd, tmp_path = tempfile.mkstemp(suffix=ext, prefix="dograh_dl_")
+    os.close(fd)
+
+    try:
+        storage = get_storage_fn(storage_backend)
+        success = await storage.adownload_file(storage_key, tmp_path)
+        if not success:
+            logger.error(f"Failed to download {storage_key}")
+            _safe_unlink(tmp_path)
+            return None
+        return tmp_path
+    except Exception:
+        logger.exception(f"Error downloading {storage_key}")
+        _safe_unlink(tmp_path)
+        return None
+
+
+# ---------------------------------------------------------------------------
+# Audio conversion via ffmpeg
+# ---------------------------------------------------------------------------
+
+
+async def convert_audio_file(
+    file_path: str,
+    target_sample_rate: int,
+    output_format: Literal["pcm", "wav"] = "pcm",
+) -> Optional[bytes]:
+    """Convert an audio file via ffmpeg.
+
+    Args:
+        file_path: Path to the source audio file.
+        target_sample_rate: Desired output sample rate.
+        output_format: ``"pcm"`` for raw s16le bytes, ``"wav"`` for a
+            complete WAV file (16-bit mono).
+
+    Returns:
+        Converted audio bytes, or None on failure.
+    """
+    ffmpeg = shutil.which("ffmpeg")
+    if not ffmpeg:
+        logger.error("ffmpeg not found on PATH - cannot convert audio")
+        return None
+
+    if output_format == "pcm":
+        fmt_args = ["-f", "s16le", "-acodec", "pcm_s16le"]
+    else:
+        fmt_args = ["-f", "wav", "-acodec", "pcm_s16le"]
+
+    cmd = [
+        ffmpeg,
+        "-i",
+        file_path,
+        *fmt_args,
+        "-ac",
+        "1",
+        "-ar",
+        str(target_sample_rate),
+        "-loglevel",
+        "error",
+        "pipe:1",
+    ]
+
+    try:
+        proc = await asyncio.create_subprocess_exec(
+            *cmd,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
+        stdout, stderr = await proc.communicate()
+
+        if proc.returncode != 0:
+            logger.error(f"ffmpeg failed (rc={proc.returncode}): {stderr.decode()}")
+            return None
+        if not stdout:
+            logger.error("ffmpeg produced no output")
+            return None
+
+        return stdout
+    except Exception:
+        logger.exception("ffmpeg subprocess error")
+        return None
+
+
+# ---------------------------------------------------------------------------
+# File I/O helpers
+# ---------------------------------------------------------------------------
+
+
+def read_cached_file(path: str) -> bytes:
+    with open(path, "rb") as f:
+        return f.read()
+
+
+def write_cache_file(path: str, data: bytes) -> None:
+    """Atomically write *data* to *path* (write-to-tmp then rename)."""
+    fd, tmp = tempfile.mkstemp(dir=CACHE_DIR, suffix=".tmp")
+    os.close(fd)
+    with open(tmp, "wb") as f:
+        f.write(data)
+    os.replace(tmp, path)
+
+
+def ext_from_key(storage_key: str) -> str:
+    """Extract file extension from a storage key, defaulting to .wav."""
+    _, ext = os.path.splitext(storage_key)
+    return ext if ext else ".wav"
+
+
+def _safe_unlink(path: str) -> None:
+    try:
+        if os.path.exists(path):
+            os.unlink(path)
+    except OSError:
+        pass
+
+
+# ---------------------------------------------------------------------------
+# Ambient noise file cache
+# ---------------------------------------------------------------------------
+
+
+def _ambient_noise_cache_path(storage_key: str, sample_rate: int) -> str:
+    """Return the on-disk path for a cached ambient noise WAV file."""
+    # Use a stable hash of the storage key so different uploads get different cache entries
+    import hashlib
+
+    key_hash = hashlib.sha256(storage_key.encode()).hexdigest()[:16]
+    return os.path.join(CACHE_DIR, f"ambient_{key_hash}_{sample_rate}.wav")
+
+
+async def get_cached_ambient_noise_path(
+    storage_key: str,
+    storage_backend: str,
+    target_sample_rate: int,
+) -> Optional[str]:
+    """Return a local WAV file path for a custom ambient noise file.
+
+    Downloads from object storage and converts to mono WAV at
+    *target_sample_rate* on the first call; subsequent calls return the
+    cached path immediately.
+
+    Args:
+        storage_key: Object storage key for the uploaded audio file.
+        storage_backend: Storage backend identifier (e.g. ``"minio"``, ``"s3"``).
+        target_sample_rate: Target sample rate for the output WAV.
+
+    Returns:
+        Absolute path to the cached WAV file, or None on failure.
+    """
+    from api.services.storage import get_storage_for_backend
+
+    cached = _ambient_noise_cache_path(storage_key, target_sample_rate)
+    if os.path.exists(cached):
+        logger.debug(f"Ambient noise served from cache: {cached}")
+        return cached
+
+    logger.info(f"Downloading custom ambient noise: {storage_key}")
+
+    def _get_storage(backend: str):
+        return get_storage_for_backend(backend)
+
+    tmp_path = await download_storage_file(storage_key, storage_backend, _get_storage)
+    if not tmp_path:
+        return None
+
+    try:
+        wav_data = await convert_audio_file(
+            tmp_path, target_sample_rate, output_format="wav"
+        )
+        if wav_data is None:
+            return None
+
+        write_cache_file(cached, wav_data)
+        logger.info(f"Cached custom ambient noise: {cached} ({len(wav_data)} bytes)")
+        return cached
+    except Exception:
+        logger.exception("Error caching ambient noise file")
+        return None
+    finally:
+        _safe_unlink(tmp_path)
--- a/api/services/pipecat/recording_audio_cache.py
+++ b/api/services/pipecat/recording_audio_cache.py
@ -6,29 +6,30 @@ leading/trailing silence, and caches the processed bytes on disk so
 subsequent plays (even from other workers) are instantaneous.
 """

-import asyncio
 import os
-import shutil
-import tempfile
 from typing import Awaitable, Callable, Optional

 import numpy as np
 from loguru import logger

-from api.constants import APP_ROOT_DIR
 from pipecat.audio.utils import SPEAKING_THRESHOLD

-# ---------------------------------------------------------------------------
-# Filesystem cache directory
-# ---------------------------------------------------------------------------
+from .audio_file_cache import (
+    CACHE_DIR,
+    convert_audio_file,
+    download_storage_file,
+    read_cached_file,
+    write_cache_file,
+)

-_CACHE_DIR = os.path.join(os.path.dirname(APP_ROOT_DIR), "dograh_pcm_cache")
-os.makedirs(_CACHE_DIR, exist_ok=True)
+# ---------------------------------------------------------------------------
+# Cache path helper
+# ---------------------------------------------------------------------------


 def _cache_path(recording_id: str, sample_rate: int) -> str:
    """Return the on-disk path for a cached PCM file."""
-    return os.path.join(_CACHE_DIR, f"{recording_id}_{sample_rate}.pcm")
+    return os.path.join(CACHE_DIR, f"{recording_id}_{sample_rate}.pcm")


 # ---------------------------------------------------------------------------
@ -72,7 +73,7 @@ def create_recording_audio_fetcher(
        # 1. Serve from filesystem cache
        if os.path.exists(cached):
            logger.debug(f"Recording {recording_id} served from disk cache")
-            return _read_file(cached)
+            return read_cached_file(cached)

        # 2. DB lookup
        recording = await db_client.get_recording_by_recording_id(
@ -172,109 +173,33 @@ async def _download_and_convert(

    Returns the processed PCM bytes, or None on failure.
    """
-    ext = _ext_from_key(recording.storage_key)
-    fd, tmp_path = tempfile.mkstemp(
-        suffix=ext, prefix=f"dograh_dl_{recording.recording_id}_"
+    tmp_path = await download_storage_file(
+        recording.storage_key, recording.storage_backend, get_storage_fn
    )
-    os.close(fd)
-    try:
-        storage = get_storage_fn(recording.storage_backend)
-        success = await storage.adownload_file(recording.storage_key, tmp_path)
-        if not success:
-            logger.error(f"Failed to download recording {recording.recording_id}")
-            return None
+    if not tmp_path:
+        return None

-        pcm_data = await _audio_file_to_pcm(tmp_path, sample_rate)
+    try:
+        pcm_data = await convert_audio_file(tmp_path, sample_rate, output_format="pcm")
        if pcm_data is None:
            return None

        pcm_data = _trim_silence(pcm_data, sample_rate)

-        # Write to disk cache atomically (write to tmp then rename)
+        # Write to disk cache
        cached = _cache_path(recording.recording_id, sample_rate)
-        fd, tmp_cache = tempfile.mkstemp(dir=_CACHE_DIR, suffix=".pcm.tmp")
-        os.close(fd)
-        _write_file(tmp_cache, pcm_data)
-        os.replace(tmp_cache, cached)
+        write_cache_file(cached, pcm_data)

        return pcm_data
    except Exception:
        logger.exception(f"Error fetching recording {recording.recording_id}")
        return None
    finally:
-        if os.path.exists(tmp_path):
-            try:
+        try:
+            if os.path.exists(tmp_path):
                os.unlink(tmp_path)
-            except OSError:
-                pass
-
-
-# ---------------------------------------------------------------------------
-# File I/O helpers (run via asyncio.to_thread)
-# ---------------------------------------------------------------------------
-
-
-def _read_file(path: str) -> bytes:
-    with open(path, "rb") as f:
-        return f.read()
-
-
-def _write_file(path: str, data: bytes) -> None:
-    with open(path, "wb") as f:
-        f.write(data)
-
-
-# ---------------------------------------------------------------------------
-# Audio conversion
-# ---------------------------------------------------------------------------
-
-
-async def _audio_file_to_pcm(
-    file_path: str, target_sample_rate: int
-) -> Optional[bytes]:
-    """Convert an audio file to raw 16-bit mono PCM bytes via ffmpeg."""
-    ffmpeg = shutil.which("ffmpeg")
-    if not ffmpeg:
-        logger.error("ffmpeg not found on PATH — cannot decode recording")
-        return None
-
-    cmd = [
-        ffmpeg,
-        "-i",
-        file_path,
-        "-f",
-        "s16le",  # raw 16-bit signed little-endian PCM
-        "-acodec",
-        "pcm_s16le",
-        "-ac",
-        "1",  # mono
-        "-ar",
-        str(target_sample_rate),
-        "-loglevel",
-        "error",
-        "pipe:1",  # output to stdout
-    ]
-
-    try:
-        proc = await asyncio.create_subprocess_exec(
-            *cmd,
-            stdout=asyncio.subprocess.PIPE,
-            stderr=asyncio.subprocess.PIPE,
-        )
-        stdout, stderr = await proc.communicate()
-
-        if proc.returncode != 0:
-            logger.error(f"ffmpeg failed (rc={proc.returncode}): {stderr.decode()}")
-            return None
-
-        if not stdout:
-            logger.error("ffmpeg produced no output")
-            return None
-
-        return stdout
-    except Exception:
-        logger.exception("ffmpeg subprocess error")
-        return None
+        except OSError:
+            pass


 # ---------------------------------------------------------------------------
@ -327,14 +252,3 @@ def _trim_silence(pcm_data: bytes, sample_rate: int) -> bytes:
        )

    return trimmed.tobytes()
-
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-
-def _ext_from_key(storage_key: str) -> str:
-    """Extract file extension from a storage key, defaulting to .wav."""
-    _, ext = os.path.splitext(storage_key)
-    return ext if ext else ".wav"
--- a/api/services/pipecat/run_pipeline.py
+++ b/api/services/pipecat/run_pipeline.py
@ -510,7 +510,7 @@ async def run_pipeline_smallwebrtc(
    # Create audio configuration for WebRTC
    audio_config = create_audio_config(WorkflowRunMode.SMALLWEBRTC.value)

-    transport = create_webrtc_transport(
+    transport = await create_webrtc_transport(
        webrtc_connection,
        workflow_run_id,
        audio_config,
--- a/api/services/pipecat/transport_setup.py
+++ b/api/services/pipecat/transport_setup.py
@ -1,11 +1,13 @@
 import os

 from fastapi import WebSocket
+from loguru import logger

 from api.constants import APP_ROOT_DIR
 from api.db import db_client
 from api.enums import OrganizationConfigurationKey
 from api.services.pipecat.audio_config import AudioConfig
+from api.services.pipecat.audio_file_cache import get_cached_ambient_noise_path
 from api.services.telephony.providers.ari_call_strategies import (
    ARIBridgeSwapStrategy,
    ARIHangupStrategy,
@ -37,6 +39,49 @@ librnnoise_path = os.path.normpath(
 )


+async def _build_audio_out_mixer(
+    audio_out_sample_rate: int,
+    ambient_noise_config: dict | None,
+):
+    """Build the audio output mixer based on the ambient noise configuration.
+
+    Returns a ``SoundfileMixer`` when ambient noise is enabled, or a
+    ``SilenceAudioMixer`` otherwise.  Supports custom user-uploaded audio
+    files via the ``storage_key`` / ``storage_backend`` fields in the config.
+    """
+    if not ambient_noise_config or not ambient_noise_config.get("enabled", False):
+        return SilenceAudioMixer()
+
+    volume = ambient_noise_config.get("volume", 0.3)
+
+    # Check for a custom uploaded ambient noise file
+    storage_key = ambient_noise_config.get("storage_key")
+    storage_backend = ambient_noise_config.get("storage_backend")
+
+    if storage_key and storage_backend:
+        cached_path = await get_cached_ambient_noise_path(
+            storage_key, storage_backend, audio_out_sample_rate
+        )
+        if cached_path:
+            return SoundfileMixer(
+                sound_files={"custom": cached_path},
+                default_sound="custom",
+                volume=volume,
+            )
+        logger.warning("Custom ambient noise file unavailable, falling back to default")
+
+    # Default built-in office ambience
+    return SoundfileMixer(
+        sound_files={
+            "office": APP_ROOT_DIR
+            / "assets"
+            / f"office-ambience-{audio_out_sample_rate}-mono.wav"
+        },
+        default_sound="office",
+        volume=volume,
+    )
+
+
 async def create_twilio_transport(
    websocket_client: WebSocket,
    stream_sid: str,
@ -79,6 +124,10 @@ async def create_twilio_transport(
        hangup_strategy=hangup_strategy,
    )

+    mixer = await _build_audio_out_mixer(
+        audio_config.transport_out_sample_rate, ambient_noise_config
+    )
+
    return FastAPIWebsocketTransport(
        websocket=websocket_client,
        params=FastAPIWebsocketParams(
@ -86,19 +135,7 @@ async def create_twilio_transport(
            audio_out_enabled=True,
            audio_in_sample_rate=audio_config.transport_in_sample_rate,
            audio_out_sample_rate=audio_config.transport_out_sample_rate,
-            audio_out_mixer=(
-                SoundfileMixer(
-                    sound_files={
-                        "office": APP_ROOT_DIR
-                        / "assets"
-                        / f"office-ambience-{audio_config.transport_out_sample_rate}-mono.wav"
-                    },
-                    default_sound="office",
-                    volume=ambient_noise_config.get("volume", 0.3),
-                )
-                if ambient_noise_config and ambient_noise_config.get("enabled", False)
-                else SilenceAudioMixer()
-            ),
+            audio_out_mixer=mixer,
            serializer=serializer,
        ),
    )
@ -144,6 +181,10 @@ async def create_cloudonix_transport(
        hangup_strategy=hangup_strategy,
    )

+    mixer = await _build_audio_out_mixer(
+        audio_config.transport_out_sample_rate, ambient_noise_config
+    )
+
    return FastAPIWebsocketTransport(
        websocket=websocket_client,
        params=FastAPIWebsocketParams(
@ -151,19 +192,7 @@ async def create_cloudonix_transport(
            audio_out_enabled=True,
            audio_in_sample_rate=audio_config.transport_in_sample_rate,
            audio_out_sample_rate=audio_config.transport_out_sample_rate,
-            audio_out_mixer=(
-                SoundfileMixer(
-                    sound_files={
-                        "office": APP_ROOT_DIR
-                        / "assets"
-                        / f"office-ambience-{audio_config.transport_out_sample_rate}-mono.wav"
-                    },
-                    default_sound="office",
-                    volume=ambient_noise_config.get("volume", 0.3),
-                )
-                if ambient_noise_config and ambient_noise_config.get("enabled", False)
-                else SilenceAudioMixer()
-            ),
+            audio_out_mixer=mixer,
            serializer=serializer,
            audio_out_10ms_chunks=2,
        ),
@ -209,6 +238,10 @@ async def create_telnyx_transport(
        inbound_encoding="PCMU",
    )

+    mixer = await _build_audio_out_mixer(
+        audio_config.transport_out_sample_rate, ambient_noise_config
+    )
+
    return FastAPIWebsocketTransport(
        websocket=websocket_client,
        params=FastAPIWebsocketParams(
@ -216,19 +249,7 @@ async def create_telnyx_transport(
            audio_out_enabled=True,
            audio_in_sample_rate=audio_config.transport_in_sample_rate,
            audio_out_sample_rate=audio_config.transport_out_sample_rate,
-            audio_out_mixer=(
-                SoundfileMixer(
-                    sound_files={
-                        "office": APP_ROOT_DIR
-                        / "assets"
-                        / f"office-ambience-{audio_config.transport_out_sample_rate}-mono.wav"
-                    },
-                    default_sound="office",
-                    volume=ambient_noise_config.get("volume", 0.3),
-                )
-                if ambient_noise_config and ambient_noise_config.get("enabled", False)
-                else SilenceAudioMixer()
-            ),
+            audio_out_mixer=mixer,
            serializer=serializer,
        ),
    )
@ -278,6 +299,10 @@ async def create_ari_transport(
        ),
    )

+    mixer = await _build_audio_out_mixer(
+        audio_config.transport_out_sample_rate, ambient_noise_config
+    )
+
    return FastAPIWebsocketTransport(
        websocket=websocket_client,
        params=FastAPIWebsocketParams(
@ -285,19 +310,7 @@ async def create_ari_transport(
            audio_out_enabled=True,
            audio_in_sample_rate=audio_config.transport_in_sample_rate,
            audio_out_sample_rate=audio_config.transport_out_sample_rate,
-            audio_out_mixer=(
-                SoundfileMixer(
-                    sound_files={
-                        "office": APP_ROOT_DIR
-                        / "assets"
-                        / f"office-ambience-{audio_config.transport_out_sample_rate}-mono.wav"
-                    },
-                    default_sound="office",
-                    volume=ambient_noise_config.get("volume", 0.3),
-                )
-                if ambient_noise_config and ambient_noise_config.get("enabled", False)
-                else SilenceAudioMixer()
-            ),
+            audio_out_mixer=mixer,
            serializer=serializer,
        ),
    )
@ -340,6 +353,10 @@ async def create_vonage_transport(
        ),
    )

+    mixer = await _build_audio_out_mixer(
+        audio_config.transport_out_sample_rate, ambient_noise_config
+    )
+
    # Important: Vonage uses binary WebSocket mode, not text
    return FastAPIWebsocketTransport(
        websocket=websocket_client,
@ -348,19 +365,7 @@ async def create_vonage_transport(
            audio_out_enabled=True,
            audio_in_sample_rate=audio_config.transport_in_sample_rate,
            audio_out_sample_rate=audio_config.transport_out_sample_rate,
-            audio_out_mixer=(
-                SoundfileMixer(
-                    sound_files={
-                        "office": APP_ROOT_DIR
-                        / "assets"
-                        / f"office-ambience-{audio_config.transport_out_sample_rate}-mono.wav"
-                    },
-                    default_sound="office",
-                    volume=ambient_noise_config.get("volume", 0.3),
-                )
-                if ambient_noise_config and ambient_noise_config.get("enabled", False)
-                else SilenceAudioMixer()
-            ),
+            audio_out_mixer=mixer,
            serializer=serializer,
        ),
    )
@ -428,6 +433,10 @@ async def create_vobiz_transport(
        f"transport_rate=8000Hz, pipeline_rate={audio_config.pipeline_sample_rate}Hz"
    )

+    mixer = await _build_audio_out_mixer(
+        audio_config.transport_out_sample_rate, ambient_noise_config
+    )
+
    # Create WebSocket transport (same structure as Twilio/Vonage)
    transport = FastAPIWebsocketTransport(
        websocket=websocket_client,
@ -436,19 +445,7 @@ async def create_vobiz_transport(
            audio_out_enabled=True,
            audio_in_sample_rate=audio_config.transport_in_sample_rate,
            audio_out_sample_rate=audio_config.transport_out_sample_rate,
-            audio_out_mixer=(
-                SoundfileMixer(
-                    sound_files={
-                        "office": APP_ROOT_DIR
-                        / "assets"
-                        / f"office-ambience-{audio_config.transport_out_sample_rate}-mono.wav"
-                    },
-                    default_sound="office",
-                    volume=ambient_noise_config.get("volume", 0.3),
-                )
-                if ambient_noise_config and ambient_noise_config.get("enabled", False)
-                else SilenceAudioMixer()
-            ),
+            audio_out_mixer=mixer,
            serializer=serializer,
        ),
    )
@ -459,7 +456,7 @@ async def create_vobiz_transport(
    return transport


-def create_webrtc_transport(
+async def create_webrtc_transport(
    webrtc_connection: SmallWebRTCConnection,
    workflow_run_id: int,
    audio_config: AudioConfig,
@ -468,6 +465,10 @@ def create_webrtc_transport(
 ):
    """Create a transport for WebRTC connections"""

+    mixer = await _build_audio_out_mixer(
+        audio_config.transport_out_sample_rate, ambient_noise_config
+    )
+
    return SmallWebRTCTransport(
        webrtc_connection=webrtc_connection,
        params=TransportParams(
@ -475,19 +476,7 @@ def create_webrtc_transport(
            audio_out_enabled=True,
            audio_in_sample_rate=audio_config.transport_in_sample_rate,
            audio_out_sample_rate=audio_config.transport_out_sample_rate,
-            audio_out_mixer=(
-                SoundfileMixer(
-                    sound_files={
-                        "office": APP_ROOT_DIR
-                        / "assets"
-                        / f"office-ambience-{audio_config.transport_out_sample_rate}-mono.wav"
-                    },
-                    default_sound="office",
-                    volume=ambient_noise_config.get("volume", 0.3),
-                )
-                if ambient_noise_config and ambient_noise_config.get("enabled", False)
-                else SilenceAudioMixer()
-            ),
+            audio_out_mixer=mixer,
        ),
    )

--- a/api/services/workflow/pipecat_engine.py
+++ b/api/services/workflow/pipecat_engine.py
@ -301,12 +301,6 @@ class PipecatEngine:
                        "Organization ID not available for knowledge base retrieval"
                    )

-                if not self._embeddings_api_key:
-                    raise ValueError(
-                        "Embeddings API key not configured. Please set your API key in "
-                        "Model Configurations > Embedding."
-                    )
-
                result = await retrieve_from_knowledge_base(
                    query=query,
                    organization_id=organization_id,
--- a/api/services/workflow/tools/knowledge_base.py
+++ b/api/services/workflow/tools/knowledge_base.py
@ -204,37 +204,66 @@ async def _perform_retrieval(
    """Internal function to perform the actual retrieval operation.

    Separated from tracing logic for cleaner code organization.
-    Uses OpenAI embeddings by default for high-quality retrieval.
+    Handles both chunked (vector search) and full_document (full text) modes.
    """
    try:
-        # Create a new embedding service instance
-        # Uses OpenAI text-embedding-3-small by default, or user-provided config
-        embedding_service = OpenAIEmbeddingService(
-            db_client=db_client,
-            max_tokens=128,  # This is only used for chunking, not for retrieval
-            api_key=embeddings_api_key,
-            model_id=embeddings_model or "text-embedding-3-small",
-            base_url=embeddings_base_url,
-        )
-
-        # Perform vector similarity search
-        results = await embedding_service.search_similar_chunks(
-            query=query,
-            organization_id=organization_id,
-            limit=limit,
-            document_uuids=document_uuids,
-        )
-
-        # Format results for LLM consumption
        chunks = []
-        for result in results:
-            chunk_info = {
-                "text": result.get("contextualized_text") or result.get("chunk_text"),
-                "filename": result.get("filename"),
-                "similarity": round(result.get("similarity", 0), 4),
-                "chunk_index": result.get("chunk_index"),
-            }
-            chunks.append(chunk_info)
+
+        # Check for full_document mode documents and return their full text
+        if document_uuids:
+            full_text_docs = await db_client.get_full_text_documents(
+                organization_id=organization_id,
+                document_uuids=document_uuids,
+            )
+            for doc in full_text_docs:
+                if doc.full_text:
+                    chunks.append(
+                        {
+                            "text": doc.full_text,
+                            "filename": doc.filename,
+                            "similarity": 1.0,
+                            "chunk_index": 0,
+                        }
+                    )
+
+            # Filter out full_document UUIDs so vector search only hits chunked docs
+            full_doc_uuids = {doc.document_uuid for doc in full_text_docs}
+            chunked_uuids = [u for u in document_uuids if u not in full_doc_uuids]
+        else:
+            chunked_uuids = document_uuids
+
+        # Perform vector similarity search on chunked documents
+        if chunked_uuids is None or len(chunked_uuids) > 0:
+            if not embeddings_api_key:
+                raise ValueError(
+                    "Embeddings API key not configured. Please set your API key in "
+                    "Model Configurations > Embedding."
+                )
+
+            embedding_service = OpenAIEmbeddingService(
+                db_client=db_client,
+                max_tokens=128,
+                api_key=embeddings_api_key,
+                model_id=embeddings_model or "text-embedding-3-small",
+                base_url=embeddings_base_url,
+            )
+
+            results = await embedding_service.search_similar_chunks(
+                query=query,
+                organization_id=organization_id,
+                limit=limit,
+                document_uuids=chunked_uuids if chunked_uuids else None,
+            )
+
+            for result in results:
+                chunk_info = {
+                    "text": result.get("contextualized_text")
+                    or result.get("chunk_text"),
+                    "filename": result.get("filename"),
+                    "similarity": round(result.get("similarity", 0), 4),
+                    "chunk_index": result.get("chunk_index"),
+                }
+                chunks.append(chunk_info)

        logger.info(
            f"Knowledge base retrieval: query='{query}', "