Added local Speech-to-Text (STT) support using Faster-Whisper

2026-05-02 20:32:39 +02:00 · 2025-10-11 23:56:12 +05:00 · 2025-10-11 23:56:12 +05:00 · dad79674c8
commit dad79674c8
parent 402039f02f
8 changed files with 396 additions and 7 deletions
--- a/surfsense_web/components/stt/audio-recorder.tsx
+++ b/surfsense_web/components/stt/audio-recorder.tsx
@ -0,0 +1,109 @@
+"use client";
+
+import { useState, useRef } from "react";
+import { Button } from "@/components/ui/button";
+import { Mic, Square, Upload } from "lucide-react";
+
+interface AudioRecorderProps {
+  onTranscription: (text: string) => void;
+  apiUrl?: string;
+}
+
+export function AudioRecorder({ onTranscription, apiUrl = "/api/v1/stt" }: AudioRecorderProps) {
+  const [isRecording, setIsRecording] = useState(false);
+  const [isTranscribing, setIsTranscribing] = useState(false);
+  const mediaRecorderRef = useRef<MediaRecorder | null>(null);
+  const chunksRef = useRef<Blob[]>([]);
+
+  const startRecording = async () => {
+    try {
+      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+      const mediaRecorder = new MediaRecorder(stream);
+      mediaRecorderRef.current = mediaRecorder;
+      chunksRef.current = [];
+
+      mediaRecorder.ondataavailable = (event) => {
+        chunksRef.current.push(event.data);
+      };
+
+      mediaRecorder.onstop = async () => {
+        const audioBlob = new Blob(chunksRef.current, { type: "audio/wav" });
+        await transcribeAudio(audioBlob);
+        stream.getTracks().forEach(track => track.stop());
+      };
+
+      mediaRecorder.start();
+      setIsRecording(true);
+    } catch (error) {
+      console.error("Error starting recording:", error);
+    }
+  };
+
+  const stopRecording = () => {
+    if (mediaRecorderRef.current && isRecording) {
+      mediaRecorderRef.current.stop();
+      setIsRecording(false);
+    }
+  };
+
+  const transcribeAudio = async (audioBlob: Blob) => {
+    setIsTranscribing(true);
+    
+    const formData = new FormData();
+    formData.append("audio", audioBlob, "recording.wav");
+
+    try {
+      const response = await fetch(`${apiUrl}/transcribe`, {
+        method: "POST",
+        body: formData,
+      });
+
+      if (!response.ok) throw new Error("Transcription failed");
+
+      const result = await response.json();
+      onTranscription(result.transcription);
+    } catch (error) {
+      console.error("Transcription error:", error);
+    } finally {
+      setIsTranscribing(false);
+    }
+  };
+
+  const handleFileUpload = async (event: React.ChangeEvent<HTMLInputElement>) => {
+    const file = event.target.files?.[0];
+    if (!file) return;
+
+    await transcribeAudio(file);
+  };
+
+  return (
+    <div className="flex gap-2 items-center">
+      <Button
+        onClick={isRecording ? stopRecording : startRecording}
+        disabled={isTranscribing}
+        variant={isRecording ? "destructive" : "default"}
+        size="sm"
+      >
+        {isRecording ? <Square className="w-4 h-4" /> : <Mic className="w-4 h-4" />}
+        {isRecording ? "Stop" : "Record"}
+      </Button>
+
+      <label>
+        <Button variant="outline" size="sm" disabled={isTranscribing} asChild>
+          <span>
+            <Upload className="w-4 h-4" />
+            Upload
+          </span>
+        </Button>
+        <input
+          type="file"
+          accept="audio/*"
+          onChange={handleFileUpload}
+          className="hidden"
+        />
+      </label>
+
+      {isTranscribing && <span className="text-sm text-muted-foreground">Transcribing...</span>}
+    </div>
+  );
+}