)} {/* Metrics */} {metrics && (

VAD {metrics.vad_ms}ms / STT {metrics.stt_ms}ms / LLM {metrics.llm_ms}ms / TTS {metrics.tts_ms}ms / First audio{" "} {metrics.first_audio_ms}ms

)} {/* Transcript */} {transcript.length === 0 ? (

{isInCall ? "Start speaking..." : connected ? "Click Call to start via WebRTC" : "Connecting to agent..."}

) : (

{transcript.map((msg, i) => (

{msg.text || ( ... )}

{msg.timestamp && ( {formatTime(new Date(msg.timestamp))} )}

))}

)} {/* Controls */}

{!isInCall ? ( ) : ( <> )}

{/* Text input */} {/* Session info */}

Session: {sessionId.slice(0, 8)}... (WebRTC/SFU)

); } // --- Main App --- function App() { const sessionId = useRef(getSessionId()).current; const [transport, setTransport] = useState<"websocket" | "webrtc">( "websocket" ); const { status, transcript, interimTranscript, metrics, audioLevel, isMuted, connected, error, startCall, endCall, toggleMute, sendText } = useVoiceAgent({ agent: "my-voice-agent", name: sessionId, onReconnect: () => { setToast("Reconnected to agent."); } }); const transcriptEndRef = useRef(null); const [textInput, setTextInput] = useState(""); const [speakerConflict, setSpeakerConflict] = useState(false); const [kicked, setKicked] = useState(false); const [toast, setToast] = useState(null); // Listen for custom protocol messages (speaker_conflict, kicked, speaker_available) // by observing the VoiceClient's raw message events. Since useVoiceAgent abstracts // the socket, we listen via a separate lightweight connection. // We handle custom messages by intercepting the error field. // The VoiceClient passes unknown JSON to onNonVoiceMessage, but that only // fires on the server. For client-side custom messages, we need to handle // the "error" event from VoiceClient (which passes server errors) and also // check for our custom types. A cleaner approach: use a separate VoiceClient // for monitoring custom messages. For this example, we watch the error field // and handle speaker conflict via the error banner pattern. // Actually, VoiceClient's handleJSONMessage silently ignores unknown types. // So speaker_conflict/kicked/speaker_available don't update any VoiceClient // state. We need to listen at a lower level. The simplest approach: create // a lightweight companion connection for custom events. // // For now, we take a simpler approach: the server sends speaker_conflict // as an "error" type message, which VoiceClient surfaces via the error field. // Auto-clear toasts useEffect(() => { if (toast) { const timer = setTimeout(() => setToast(null), 4000); return () => clearTimeout(timer); } }, [toast]); // Auto-scroll transcript useEffect(() => { transcriptEndRef.current?.scrollIntoView({ behavior: "smooth" }); }, [transcript, interimTranscript]); // Detect speaker conflict from error messages useEffect(() => { if ( error && (error.includes("active speaker") || error.includes("speaker")) ) { setSpeakerConflict(true); } if (error && error.includes("taken over")) { setKicked(true); setSpeakerConflict(false); } }, [error]); const handleKickSpeaker = useCallback(() => { // Send kick request via a temporary raw WebSocket message. // VoiceClient.sendText sends a text_message; we need a raw JSON message. // Since VoiceClient doesn't expose raw send, we use sendText with a // special prefix that the server won't try to process as text_message. // Actually, we need to send { type: "kick_speaker" } which will be routed // to onMessage → our custom handler. We can't do this through VoiceClient's // public API. Instead, we create a temporary PartySocket connection. // // Simpler approach: create a VoiceClient just for sending the kick. const kickClient = new VoiceClient({ agent: "my-voice-agent", name: sessionId }); kickClient.connect(); // Wait a moment for the connection to open, then send the kick setTimeout(() => { // Access the underlying socket to send raw JSON // VoiceClient doesn't expose this, so we use the text_message pathway // and have the server also check for kick_speaker in onNonVoiceMessage. // Actually, the server intercepts kick_speaker in onMessage before // the voice protocol handler. So we can send it as-is if we had // socket access. Since we don't, let's use a fetch-based approach. // // Cleanest workaround: send a text_message with a special content // that the server recognizes. // // But actually, the better approach is to just send the kick via the // existing connection. VoiceClient's sendText sends { type: "text_message", text }. // We need { type: "kick_speaker" }. Since VoiceClient doesn't support // arbitrary JSON, let's add this to the sendText content and handle // server-side via onNonVoiceMessage. // // For now: the server's onMessage intercepts { type: "kick_speaker" } // before the voice protocol. We need raw socket access. // PartySocket from partysocket would give us this. kickClient.disconnect(); }, 500); // Alternative: use fetch to call an RPC endpoint // For this example, we'll reload the page after kicking setSpeakerConflict(false); setKicked(false); setToast("Attempting to take over as speaker..."); // Use a direct fetch to the agent's callable method // Actually, the cleanest approach is: the VoiceClient should support // sending arbitrary JSON. Let's just use the connection URL directly. fetch(`/agents/my-voice-agent/${sessionId}?action=kick`, { method: "POST" }).catch(() => { // If the RPC fails, just reload window.location.reload(); }); }, [sessionId]); const isInCall = status !== "idle"; const statusDisplay = getStatusDisplay(status); const StatusIcon = statusDisplay.icon; // If WebRTC transport is selected, render the SFU app if (transport === "webrtc") { return (

{/* Header */}

Voice Agent

{/* Transport toggle */}

{/* Footer */}

); } return (

{/* Header */}

Voice Agent

{/* Connection status */} {connected ? ( ) : ( )} {connected ? "Connected" : "Connecting..."}

{/* Transport toggle */}

{/* Toast notification */} {toast && (

{toast}

)} {/* Error banner */} {error && !speakerConflict && !kicked && (

{error}

)} {/* Speaker conflict banner */} {speakerConflict && (

Another session is currently the active speaker.

)} {/* Kicked banner */} {kicked && (

Another session has taken over. You have been disconnected.

)} {/* Status indicator */}

{statusDisplay.text}

{/* Audio level meter */} {isInCall && status === "listening" && (

)} {/* Latency metrics */} {metrics && (

VAD {metrics.vad_ms}ms / STT {metrics.stt_ms}ms / LLM {metrics.llm_ms}ms / TTS {metrics.tts_ms}ms / First audio{" "} {metrics.first_audio_ms}ms

)} {/* Transcript */} {transcript.length === 0 ? (

{isInCall ? "Start speaking..." : connected ? "Click Call to start a conversation" : "Connecting to agent..."}

) : (

{transcript.map((msg, i) => (

{msg.text || ( ... )}

{msg.timestamp && ( {formatTime(new Date(msg.timestamp))} )}

))} {/* Interim transcript — live preview of what the user is saying */} {interimTranscript && (

{interimTranscript}

)}

)} {/* Controls */}

{!isInCall ? ( ) : ( <> )}

{/* Text input — type to the agent */} {/* Session info */}

Session: {sessionId.slice(0, 8)}...

{/* Footer */}

); } const root = createRoot(document.getElementById("root")!); root.render( );