import { useEffect, useRef, useState, useCallback, useMemo } from "react"; import { VoiceClient, type VoiceClientOptions, type VoiceStatus, type TranscriptMessage, type VoicePipelineMetrics } from "./voice-client"; // Re-export types so consumers can import everything from agents/voice-react export type { VoiceStatus, VoiceRole, VoiceAudioFormat, VoiceAudioInput, VoiceTransport, TranscriptMessage, VoicePipelineMetrics, VoiceClientOptions, VoiceClientEvent, VoiceClientEventMap } from "./voice-client"; export { WebSocketVoiceTransport } from "./voice-client"; /** Options accepted by useVoiceAgent. */ export interface UseVoiceAgentOptions extends VoiceClientOptions { /** * Called when the hook reconnects due to option changes (e.g., agent name * or instance name changed). Use this to show a toast or notification. */ onReconnect?: () => void; } export interface UseVoiceAgentReturn { status: VoiceStatus; transcript: TranscriptMessage[]; /** * The current interim (partial) transcript from streaming STT. * Updates in real time as the user speaks. null when not available. */ interimTranscript: string | null; metrics: VoicePipelineMetrics | null; audioLevel: number; isMuted: boolean; connected: boolean; error: string | null; startCall: () => Promise; endCall: () => void; toggleMute: () => void; sendText: (text: string) => void; /** Send arbitrary JSON to the agent (app-level messages). */ sendJSON: (data: Record) => void; /** The last non-voice-protocol message received from the server. */ lastCustomMessage: unknown; } // --------------------------------------------------------------------------- // useVoiceInput — lightweight hook for voice-to-text dictation // --------------------------------------------------------------------------- /** Options accepted by useVoiceInput. */ export interface UseVoiceInputOptions { /** Agent name (matches the server-side Durable Object class). */ agent: string; /** Instance name for the agent. @default "default" */ name?: string; /** Host to connect to. @default window.location.host */ host?: string; /** RMS threshold below which audio is considered silence. @default 0.04 */ silenceThreshold?: number; /** How long silence must last before sending end_of_speech (ms). @default 500 */ silenceDurationMs?: number; } export interface UseVoiceInputReturn { /** Accumulated final transcript text from all utterances. */ transcript: string; /** * Current interim (partial) transcript from streaming STT. * Updates in real time as the user speaks. null when not available. */ interimTranscript: string | null; /** Whether the mic is actively listening. */ isListening: boolean; /** Current audio level (0–1) for visual feedback (e.g. waveform). */ audioLevel: number; /** Whether the mic is muted. */ isMuted: boolean; /** Any error message. */ error: string | null; /** Start listening — requests mic permission and begins streaming audio. */ start: () => Promise; /** Stop listening — releases the mic. */ stop: () => void; /** Toggle mute (mic stays open but audio is not sent). */ toggleMute: () => void; /** Clear the accumulated transcript. */ clear: () => void; } /** * React hook for voice-to-text input. Captures microphone audio, streams it * to a server-side VoiceAgent for STT, and returns the transcript as a string. * * Unlike `useVoiceAgent`, this hook is optimised for dictation — it accumulates * user transcripts into a single string and ignores assistant responses / TTS. * * @example * ```tsx * const { transcript, interimTranscript, isListening, start, stop } = useVoiceInput({ * agent: "voice-input-agent" * }); * *