From 0a972ef66df10bfc34eac34f99334952a8682d74 Mon Sep 17 00:00:00 2001 From: Sabiha Khan Date: Sat, 11 Apr 2026 18:20:25 +0530 Subject: [PATCH] fix: bake punkt_tab file into docker images --- api/Dockerfile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/api/Dockerfile b/api/Dockerfile index 37a64ef..09a8df7 100644 --- a/api/Dockerfile +++ b/api/Dockerfile @@ -26,6 +26,8 @@ RUN pip install --user --no-cache-dir -r requirements.txt && \ # Copy and install pipecat from local submodule COPY pipecat /tmp/pipecat RUN pip install --user --no-cache-dir '/tmp/pipecat[cartesia,deepgram,openai,elevenlabs,groq,google,azure,sarvam,soundfile,silero,webrtc,local-smart-turn-v3,speechmatics,openrouter,camb]' && \ + # Pre-download NLTK punkt_tab tokenizer data (required by pipecat at runtime) + python -c "import nltk; nltk.download('punkt_tab', quiet=True)" && \ # Clean up pip cache and temporary pipecat directory rm -rf /root/.cache/pip /tmp/pipecat @@ -48,6 +50,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ # Copy Python packages from builder stage COPY --from=builder /root/.local /root/.local +# Copy NLTK data (punkt_tab tokenizer) from builder stage +COPY --from=builder /root/nltk_data /root/nltk_data + # Make sure scripts in .local are available ENV PATH=/root/.local/bin:$PATH