dograh/ui/src/components/VoiceSelectorModal.tsx

"use client";

import { Check, ChevronDown, Loader2, Pencil, Play, Square } from "lucide-react";
import { useCallback, useEffect, useMemo, useRef, useState } from "react";

import { getVoicesApiV1UserConfigurationsVoicesProviderGet } from "@/client/sdk.gen";
import { VoiceInfo } from "@/client/types.gen";
import { Button } from "@/components/ui/button";
import {
    Dialog,
    DialogContent,
    DialogHeader,
    DialogTitle,
} from "@/components/ui/dialog";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select";
import { ACCENT_DISPLAY_NAMES } from "@/constants/accents";
import { LANGUAGE_DISPLAY_NAMES } from "@/constants/languages";
import { cn } from "@/lib/utils";

const ALL_FILTER_VALUE = "__all__";

// Defaults so the modal opens on a focused set instead of the full catalog.
const DEFAULT_GENDER = "female";
const DEFAULT_ACCENT = "us"; // American
const DEFAULT_LANGUAGE = "en";

const SEARCH_DEBOUNCE_MS = 300;

interface Facets {
    genders: string[];
    accents: string[];
    languages: string[];
}

const EMPTY_FACETS: Facets = { genders: [], accents: [], languages: [] };

interface VoiceSelectorModalProps {
    provider: string;
    value: string;
    onChange: (voiceId: string) => void;
    /** Optional model passed through to the voice catalog query. */
    model?: string;
    /** Allow typing a raw voice ID for voices outside the catalog. */
    allowManualInput?: boolean;
    className?: string;
}

const capitalize = (value: string) => value.charAt(0).toUpperCase() + value.slice(1);

const accentLabel = (code?: string | null) =>
    code ? ACCENT_DISPLAY_NAMES[code.toLowerCase()] || capitalize(code) : "";
const languageLabel = (code?: string | null) =>
    code ? LANGUAGE_DISPLAY_NAMES[code] || code.toUpperCase() : "";
const genderLabel = (gender?: string | null) => (gender ? capitalize(gender) : "");

/** Build the "Accent · Gender · Language" trait line shown under a voice name. */
function voiceTraits(voice: VoiceInfo): string {
    return [accentLabel(voice.accent), genderLabel(voice.gender), languageLabel(voice.language)]
        .filter(Boolean)
        .join(" · ");
}

/** Ensure the active filter value is always an option so the Select can render it. */
function withSelected(options: string[], selected: string): string[] {
    if (selected === ALL_FILTER_VALUE || options.includes(selected)) return options;
    return [selected, ...options];
}

export const VoiceSelectorModal: React.FC<VoiceSelectorModalProps> = ({
    provider,
    value,
    onChange,
    model,
    allowManualInput = false,
    className,
}) => {
    const [isOpen, setIsOpen] = useState(false);
    const [voices, setVoices] = useState<VoiceInfo[]>([]);
    const [facets, setFacets] = useState<Facets>(EMPTY_FACETS);
    const [isLoading, setIsLoading] = useState(false);
    const [error, setError] = useState<string | null>(null);

    // Filters drive a server-side query (we never fetch the whole catalog).
    const [gender, setGender] = useState(DEFAULT_GENDER);
    const [accent, setAccent] = useState(DEFAULT_ACCENT);
    const [language, setLanguage] = useState(DEFAULT_LANGUAGE);
    const [searchInput, setSearchInput] = useState("");
    const [debouncedSearch, setDebouncedSearch] = useState("");

    // Pending (in-modal) selection; only committed via "Use this voice".
    const [pendingVoiceId, setPendingVoiceId] = useState(value);
    const [selectedVoiceInfo, setSelectedVoiceInfo] = useState<VoiceInfo | null>(null);
    const [manualMode, setManualMode] = useState(false);
    const [manualVoiceId, setManualVoiceId] = useState("");

    // Preview playback.
    const [playingVoiceId, setPlayingVoiceId] = useState<string | null>(null);
    const audioRef = useRef<HTMLAudioElement | null>(null);
    const requestId = useRef(0);

    const stopPreview = useCallback(() => {
        if (audioRef.current) {
            audioRef.current.pause();
            audioRef.current = null;
        }
        setPlayingVoiceId(null);
    }, []);

    // Debounce the search box so typing doesn't fire a request per keystroke.
    useEffect(() => {
        const timer = setTimeout(() => setDebouncedSearch(searchInput), SEARCH_DEBOUNCE_MS);
        return () => clearTimeout(timer);
    }, [searchInput]);

    // Resolve the currently-selected voice (for the trigger label) without
    // pulling the catalog: a targeted lookup by voice ID.
    useEffect(() => {
        if (!value) {
            setSelectedVoiceInfo(null);
            return;
        }
        let active = true;
        (async () => {
            const response = await getVoicesApiV1UserConfigurationsVoicesProviderGet({
                path: { provider: provider as never },
                query: { q: value },
            });
            if (!active) return;
            const found = response.data?.voices?.find((voice) => voice.voice_id === value) ?? null;
            setSelectedVoiceInfo(found);
        })();
        return () => {
            active = false;
        };
    }, [value, provider]);

    // Fetch the filtered voice list (server-side) whenever the modal is open
    // and a filter changes. A request counter discards out-of-order responses.
    useEffect(() => {
        if (!isOpen || manualMode) return;
        const id = ++requestId.current;
        setIsLoading(true);
        setError(null);
        (async () => {
            const query: Record<string, string> = {};
            if (model) query.model = model;
            if (gender !== ALL_FILTER_VALUE) query.gender = gender;
            if (accent !== ALL_FILTER_VALUE) query.accent = accent;
            if (language !== ALL_FILTER_VALUE) query.language = language;
            const search = debouncedSearch.trim();
            if (search) query.q = search;

            const response = await getVoicesApiV1UserConfigurationsVoicesProviderGet({
                path: { provider: provider as never },
                query,
            });
            if (id !== requestId.current) return; // a newer request superseded this one

            if (response.error) {
                setError("Failed to load voices");
                setVoices([]);
            } else {
                setVoices(response.data?.voices ?? []);
                if (response.data?.facets) {
                    setFacets({
                        genders: response.data.facets.genders ?? [],
                        accents: response.data.facets.accents ?? [],
                        languages: response.data.facets.languages ?? [],
                    });
                }
            }
            setIsLoading(false);
        })();
    }, [isOpen, manualMode, provider, model, gender, accent, language, debouncedSearch]);

    // Stop any preview when the modal closes / unmounts.
    useEffect(() => {
        if (!isOpen) stopPreview();
        return () => stopPreview();
    }, [isOpen, stopPreview]);

    // Facets arrive sorted by raw code; present them sorted by display label so
    // the dropdowns read alphabetically (e.g. "American" near the top, not "us").
    const toSortedOptions = (codes: string[], selected: string, label: (code: string) => string) =>
        withSelected(codes, selected)
            .map((code) => ({ value: code, label: label(code) }))
            .sort((a, b) => a.label.localeCompare(b.label));

    const genderOptions = useMemo(
        () => toSortedOptions(facets.genders, gender, genderLabel),
        [facets.genders, gender],
    );
    const accentOptions = useMemo(
        () => toSortedOptions(facets.accents, accent, accentLabel),
        [facets.accents, accent],
    );
    const languageOptions = useMemo(
        () => toSortedOptions(facets.languages, language, languageLabel),
        [facets.languages, language],
    );

    const openModal = () => {
        setGender(DEFAULT_GENDER);
        setAccent(DEFAULT_ACCENT);
        setLanguage(DEFAULT_LANGUAGE);
        setSearchInput("");
        setDebouncedSearch("");
        setManualMode(false);
        setManualVoiceId(value);
        setPendingVoiceId(value);
        setIsOpen(true);
    };

    const playPreview = (voice: VoiceInfo) => {
        if (playingVoiceId === voice.voice_id) {
            stopPreview();
            return;
        }
        stopPreview();
        if (!voice.preview_url) return;
        const audio = new Audio(voice.preview_url);
        audioRef.current = audio;
        setPlayingVoiceId(voice.voice_id);
        const clear = () => {
            if (audioRef.current === audio) audioRef.current = null;
            setPlayingVoiceId((current) => (current === voice.voice_id ? null : current));
        };
        audio.onended = clear;
        audio.onerror = clear;
        audio.play().catch(clear);
    };

    const commitSelection = () => {
        if (manualMode) {
            const next = manualVoiceId.trim();
            if (next) onChange(next);
        } else if (pendingVoiceId) {
            onChange(pendingVoiceId);
            const chosen = voices.find((voice) => voice.voice_id === pendingVoiceId);
            if (chosen) setSelectedVoiceInfo(chosen);
        }
        setIsOpen(false);
    };

    const triggerLabel = selectedVoiceInfo?.name || value || "Select a voice";
    const triggerTraits = selectedVoiceInfo ? voiceTraits(selectedVoiceInfo) : "";

    return (
        <div className={cn("space-y-2", className)}>
            <Button
                type="button"
                variant="outline"
                className={cn("w-full justify-between", !value && "text-muted-foreground")}
                onClick={openModal}
            >
                <span className="flex min-w-0 items-center gap-2">
                    <span className="truncate font-medium">{triggerLabel}</span>
                    {triggerTraits && (
                        <span className="truncate text-xs text-muted-foreground">{triggerTraits}</span>
                    )}
                </span>
                <ChevronDown className="ml-2 h-4 w-4 shrink-0 opacity-50" />
            </Button>

            <Dialog open={isOpen} onOpenChange={setIsOpen}>
                <DialogContent className="flex max-h-[85vh] flex-col gap-0 overflow-hidden p-0 sm:max-w-3xl">
                    <DialogHeader className="border-b px-6 py-4">
                        <DialogTitle>Select Voice</DialogTitle>
                    </DialogHeader>

                    {/* Filter row: Gender · Accent · Language · Search */}
                    <div className="flex flex-wrap items-center gap-2 border-b px-6 py-3">
                        <Select value={gender} onValueChange={setGender} disabled={manualMode}>
                            <SelectTrigger className="h-9 w-[130px]">
                                <SelectValue placeholder="Gender" />
                            </SelectTrigger>
                            <SelectContent>
                                <SelectItem value={ALL_FILTER_VALUE}>All genders</SelectItem>
                                {genderOptions.map((option) => (
                                    <SelectItem key={option.value} value={option.value}>
                                        {option.label}
                                    </SelectItem>
                                ))}
                            </SelectContent>
                        </Select>

                        <Select value={accent} onValueChange={setAccent} disabled={manualMode}>
                            <SelectTrigger className="h-9 w-[140px]">
                                <SelectValue placeholder="Accent" />
                            </SelectTrigger>
                            <SelectContent>
                                <SelectItem value={ALL_FILTER_VALUE}>All accents</SelectItem>
                                {accentOptions.map((option) => (
                                    <SelectItem key={option.value} value={option.value}>
                                        {option.label}
                                    </SelectItem>
                                ))}
                            </SelectContent>
                        </Select>

                        <Select value={language} onValueChange={setLanguage} disabled={manualMode}>
                            <SelectTrigger className="h-9 w-[150px]">
                                <SelectValue placeholder="Language" />
                            </SelectTrigger>
                            <SelectContent>
                                <SelectItem value={ALL_FILTER_VALUE}>All languages</SelectItem>
                                {languageOptions.map((option) => (
                                    <SelectItem key={option.value} value={option.value}>
                                        {option.label}
                                    </SelectItem>
                                ))}
                            </SelectContent>
                        </Select>

                        <Input
                            placeholder="Search voices..."
                            value={searchInput}
                            onChange={(event) => setSearchInput(event.target.value)}
                            className="h-9 min-w-[160px] flex-1"
                            disabled={manualMode}
                        />
                    </div>

                    {/* Body */}
                    <div className="min-h-[260px] flex-1 overflow-auto px-6 py-4">
                        {manualMode ? (
                            <div className="space-y-2">
                                <Label htmlFor="manual-voice-id">Custom voice ID</Label>
                                <Input
                                    id="manual-voice-id"
                                    placeholder="Enter voice ID"
                                    value={manualVoiceId}
                                    onChange={(event) => setManualVoiceId(event.target.value)}
                                    autoFocus
                                />
                                <p className="text-xs text-muted-foreground">
                                    Use a voice ID that isn&apos;t in the catalog above.
                                </p>
                            </div>
                        ) : error ? (
                            <p className="py-10 text-center text-sm text-destructive">{error}</p>
                        ) : isLoading ? (
                            <div className="flex items-center justify-center py-10">
                                <Loader2 className="h-6 w-6 animate-spin text-muted-foreground" />
                            </div>
                        ) : voices.length === 0 ? (
                            <p className="py-10 text-center text-sm text-muted-foreground">
                                No voices match these filters
                            </p>
                        ) : (
                            <div className="grid gap-2 sm:grid-cols-2">
                                {voices.map((voice) => {
                                    const isSelected = pendingVoiceId === voice.voice_id;
                                    const isPlaying = playingVoiceId === voice.voice_id;
                                    return (
                                        <button
                                            type="button"
                                            key={voice.voice_id}
                                            onClick={() => setPendingVoiceId(voice.voice_id)}
                                            className={cn(
                                                "flex items-center gap-3 rounded-lg border p-3 text-left transition-colors hover:bg-accent",
                                                isSelected ? "border-primary ring-1 ring-primary" : "border-border",
                                            )}
                                        >
                                            <span
                                                role="button"
                                                tabIndex={voice.preview_url ? 0 : -1}
                                                aria-label={isPlaying ? "Stop preview" : "Play preview"}
                                                onClick={(event) => {
                                                    event.stopPropagation();
                                                    playPreview(voice);
                                                }}
                                                onKeyDown={(event) => {
                                                    if (event.key === "Enter" || event.key === " ") {
                                                        event.preventDefault();
                                                        event.stopPropagation();
                                                        playPreview(voice);
                                                    }
                                                }}
                                                className={cn(
                                                    "flex h-10 w-10 shrink-0 items-center justify-center rounded-full",
                                                    voice.preview_url
                                                        ? "bg-primary/10 text-primary hover:bg-primary/20"
                                                        : "bg-muted text-muted-foreground",
                                                )}
                                            >
                                                {isPlaying ? (
                                                    <Square className="h-4 w-4 fill-current" />
                                                ) : (
                                                    <Play className="h-4 w-4 fill-current" />
                                                )}
                                            </span>
                                            <span className="flex min-w-0 flex-1 flex-col">
                                                <span className="flex items-center gap-2">
                                                    <span className="truncate text-sm font-medium">{voice.name}</span>
                                                    {isSelected && <Check className="h-4 w-4 shrink-0 text-primary" />}
                                                </span>
                                                {voiceTraits(voice) && (
                                                    <span className="truncate text-xs text-muted-foreground">
                                                        {voiceTraits(voice)}
                                                    </span>
                                                )}
                                                <span className="truncate text-[11px] text-muted-foreground/70">
                                                    ID: {voice.voice_id}
                                                </span>
                                            </span>
                                        </button>
                                    );
                                })}
                            </div>
                        )}
                    </div>

                    {/* Footer */}
                    <div className="flex items-center justify-between gap-3 border-t px-6 py-3">
                        {allowManualInput ? (
                            <Button
                                type="button"
                                variant="ghost"
                                size="sm"
                                className="text-muted-foreground"
                                onClick={() => setManualMode((prev) => !prev)}
                            >
                                <Pencil className="mr-2 h-4 w-4" />
                                {manualMode ? "Browse catalog" : "Custom voice ID"}
                            </Button>
                        ) : (
                            <span className="text-xs text-muted-foreground">
                                {!manualMode && !isLoading && !error ? `${voices.length} voices` : ""}
                            </span>
                        )}
                        <div className="flex items-center gap-2">
                            <Button type="button" variant="outline" onClick={() => setIsOpen(false)}>
                                Cancel
                            </Button>
                            <Button
                                type="button"
                                onClick={commitSelection}
                                disabled={manualMode ? !manualVoiceId.trim() : !pendingVoiceId}
                            >
                                Use this voice
                            </Button>
                        </div>
                    </div>
                </DialogContent>
            </Dialog>
        </div>
    );
};