Handle realtime conversation end in the TUI (#14903)

- close live realtime sessions on errors, ctrl-c, and active meter
removal
- centralize TUI realtime cleanup and avoid duplicate follow-up close
info

---------

Co-authored-by: Codex <noreply@openai.com>
Co-authored-by: Ahmed Ibrahim <219906144+aibrahim-oai@users.noreply.github.com>
This commit is contained in:
Ahmed Ibrahim
2026-03-17 21:04:58 -07:00
committed by GitHub
parent 770616414a
commit 3ce879c646
10 changed files with 326 additions and 67 deletions

View File

@@ -4,10 +4,13 @@ use codex_protocol::protocol::RealtimeAudioFrame;
use codex_protocol::protocol::RealtimeConversationClosedEvent;
use codex_protocol::protocol::RealtimeConversationRealtimeEvent;
use codex_protocol::protocol::RealtimeConversationStartedEvent;
#[cfg(not(target_os = "linux"))]
use codex_protocol::protocol::RealtimeConversationVersion;
use codex_protocol::protocol::RealtimeEvent;
#[cfg(not(target_os = "linux"))]
use std::sync::atomic::AtomicUsize;
#[cfg(not(target_os = "linux"))]
use std::time::Duration;
const REALTIME_CONVERSATION_PROMPT: &str = "You are in a realtime voice conversation in the Codex TUI. Respond conversationally and concisely.";
@@ -22,12 +25,14 @@ pub(super) enum RealtimeConversationPhase {
#[derive(Default)]
pub(super) struct RealtimeConversationUiState {
phase: RealtimeConversationPhase,
pub(super) phase: RealtimeConversationPhase,
#[cfg(not(target_os = "linux"))]
audio_behavior: RealtimeAudioBehavior,
requested_close: bool,
session_id: Option<String>,
warned_audio_only_submission: bool,
meter_placeholder_id: Option<String>,
#[cfg(not(target_os = "linux"))]
pub(super) meter_placeholder_id: Option<String>,
#[cfg(not(target_os = "linux"))]
capture_stop_flag: Option<Arc<AtomicBool>>,
#[cfg(not(target_os = "linux"))]
@@ -40,6 +45,7 @@ pub(super) struct RealtimeConversationUiState {
playback_queued_samples: Arc<AtomicUsize>,
}
#[cfg(not(target_os = "linux"))]
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
enum RealtimeAudioBehavior {
#[default]
@@ -47,6 +53,7 @@ enum RealtimeAudioBehavior {
PlaybackAware,
}
#[cfg(not(target_os = "linux"))]
impl RealtimeAudioBehavior {
fn from_version(version: RealtimeConversationVersion) -> Self {
match version {
@@ -55,7 +62,6 @@ impl RealtimeAudioBehavior {
}
}
#[cfg(not(target_os = "linux"))]
fn input_behavior(
self,
playback_queued_samples: Arc<AtomicUsize>,
@@ -79,6 +85,7 @@ impl RealtimeConversationUiState {
)
}
#[cfg(not(target_os = "linux"))]
pub(super) fn is_active(&self) -> bool {
matches!(self.phase, RealtimeConversationPhase::Active)
}
@@ -233,7 +240,10 @@ impl ChatWidget {
self.realtime_conversation.phase = RealtimeConversationPhase::Starting;
self.realtime_conversation.requested_close = false;
self.realtime_conversation.session_id = None;
self.realtime_conversation.audio_behavior = RealtimeAudioBehavior::Legacy;
#[cfg(not(target_os = "linux"))]
{
self.realtime_conversation.audio_behavior = RealtimeAudioBehavior::Legacy;
}
self.realtime_conversation.warned_audio_only_submission = false;
self.set_footer_hint_override(Some(vec![(
"/realtime".to_string(),
@@ -273,22 +283,38 @@ impl ChatWidget {
self.realtime_conversation.phase = RealtimeConversationPhase::Inactive;
self.realtime_conversation.requested_close = false;
self.realtime_conversation.session_id = None;
self.realtime_conversation.audio_behavior = RealtimeAudioBehavior::Legacy;
#[cfg(not(target_os = "linux"))]
{
self.realtime_conversation.audio_behavior = RealtimeAudioBehavior::Legacy;
}
self.realtime_conversation.warned_audio_only_submission = false;
}
fn fail_realtime_conversation(&mut self, message: String) {
self.add_error_message(message);
if self.realtime_conversation.is_live() {
self.request_realtime_conversation_close(/*info_message*/ None);
} else {
self.reset_realtime_conversation_state();
self.request_redraw();
}
}
pub(super) fn on_realtime_conversation_started(
&mut self,
ev: RealtimeConversationStartedEvent,
) {
if !self.realtime_conversation_enabled() {
self.submit_op(Op::RealtimeConversationClose);
self.reset_realtime_conversation_state();
self.request_realtime_conversation_close(/*info_message*/ None);
return;
}
self.realtime_conversation.phase = RealtimeConversationPhase::Active;
self.realtime_conversation.session_id = ev.session_id;
self.realtime_conversation.audio_behavior = RealtimeAudioBehavior::from_version(ev.version);
#[cfg(not(target_os = "linux"))]
{
self.realtime_conversation.audio_behavior =
RealtimeAudioBehavior::from_version(ev.version);
}
self.realtime_conversation.warned_audio_only_submission = false;
self.set_footer_hint_override(Some(vec![(
"/realtime".to_string(),
@@ -308,14 +334,16 @@ impl ChatWidget {
}
RealtimeEvent::InputAudioSpeechStarted(_) | RealtimeEvent::ResponseCancelled(_) => {
#[cfg(not(target_os = "linux"))]
if matches!(
self.realtime_conversation.audio_behavior,
RealtimeAudioBehavior::PlaybackAware
) && let Some(player) = &self.realtime_conversation.audio_player
{
// Once the server detects user speech or the current response is cancelled,
// any buffered assistant audio is stale and should stop gating mic input.
player.clear();
if matches!(
self.realtime_conversation.audio_behavior,
RealtimeAudioBehavior::PlaybackAware
) && let Some(player) = &self.realtime_conversation.audio_player
{
// Once the server detects user speech or the current response is cancelled,
// any buffered assistant audio is stale and should stop gating mic input.
player.clear();
}
}
}
RealtimeEvent::InputTranscriptDelta(_) => {}
@@ -325,8 +353,7 @@ impl ChatWidget {
RealtimeEvent::ConversationItemDone { .. } => {}
RealtimeEvent::HandoffRequested(_) => {}
RealtimeEvent::Error(message) => {
self.add_error_message(format!("Realtime voice error: {message}"));
self.reset_realtime_conversation_state();
self.fail_realtime_conversation(format!("Realtime voice error: {message}"));
}
}
}
@@ -335,7 +362,10 @@ impl ChatWidget {
let requested = self.realtime_conversation.requested_close;
let reason = ev.reason;
self.reset_realtime_conversation_state();
if !requested && let Some(reason) = reason {
if !requested
&& let Some(reason) = reason
&& reason != "error"
{
self.add_info_message(
format!("Realtime voice mode closed: {reason}"),
/*hint*/ None,
@@ -387,9 +417,11 @@ impl ChatWidget {
) {
Ok(capture) => capture,
Err(err) => {
self.remove_transcription_placeholder(&placeholder_id);
self.realtime_conversation.meter_placeholder_id = None;
self.add_error_message(format!("Failed to start microphone capture: {err}"));
self.remove_transcription_placeholder(&placeholder_id);
self.fail_realtime_conversation(format!(
"Failed to start microphone capture: {err}"
));
return;
}
};
@@ -431,7 +463,7 @@ impl ChatWidget {
#[cfg(target_os = "linux")]
fn start_realtime_local_audio(&mut self) {}
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
#[cfg(not(target_os = "linux"))]
pub(crate) fn restart_realtime_audio_device(&mut self, kind: RealtimeAudioDeviceKind) {
if !self.realtime_conversation.is_active() {
return;
@@ -452,7 +484,9 @@ impl ChatWidget {
self.realtime_conversation.audio_player = Some(player);
}
Err(err) => {
self.add_error_message(format!("Failed to start speaker output: {err}"));
self.fail_realtime_conversation(format!(
"Failed to start speaker output: {err}"
));
}
}
}
@@ -460,7 +494,7 @@ impl ChatWidget {
self.request_redraw();
}
#[cfg(any(target_os = "linux", not(feature = "voice-input")))]
#[cfg(target_os = "linux")]
pub(crate) fn restart_realtime_audio_device(&mut self, kind: RealtimeAudioDeviceKind) {
let _ = kind;
}
@@ -472,9 +506,7 @@ impl ChatWidget {
}
#[cfg(target_os = "linux")]
fn stop_realtime_local_audio(&mut self) {
self.realtime_conversation.meter_placeholder_id = None;
}
fn stop_realtime_local_audio(&mut self) {}
#[cfg(not(target_os = "linux"))]
fn stop_realtime_microphone(&mut self) {