Add realtime audio device config (#12849)

## Summary
- add top-level realtime audio config for microphone and speaker
selection
- apply configured devices when starting realtime capture and playback
- keep missing-device behavior on the system default fallback path

## Validation
- just write-config-schema
- cargo test -p codex-core realtime_audio
- cargo test -p codex-tui
- just fix -p codex-core
- just fix -p codex-tui
- just fmt

---------

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Ahmed Ibrahim
2026-02-26 15:08:21 -08:00
committed by GitHub
parent fd719d3828
commit a0e86c69fe
7 changed files with 299 additions and 17 deletions

View File

@@ -426,6 +426,9 @@ pub struct Config {
/// Base URL for requests to ChatGPT (as opposed to the OpenAI API).
pub chatgpt_base_url: String,
/// Machine-local realtime audio device preferences used by realtime voice.
pub realtime_audio: RealtimeAudioConfig,
/// Experimental / do not use. Overrides only the realtime conversation
/// websocket transport base URL (the `Op::RealtimeConversation` `/ws`
/// connection) without changing normal provider HTTP requests.
@@ -1175,6 +1178,10 @@ pub struct ConfigToml {
/// Base URL for requests to ChatGPT (as opposed to the OpenAI API).
pub chatgpt_base_url: Option<String>,
/// Machine-local realtime audio device preferences used by realtime voice.
#[serde(default)]
pub audio: Option<RealtimeAudioToml>,
/// Experimental / do not use. Overrides only the realtime conversation
/// websocket transport base URL (the `Op::RealtimeConversation` `/ws`
/// connection) without changing normal provider HTTP requests.
@@ -1306,6 +1313,19 @@ impl ProjectConfig {
}
}
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct RealtimeAudioConfig {
pub microphone: Option<String>,
pub speaker: Option<String>,
}
#[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq, Eq, JsonSchema)]
#[schemars(deny_unknown_fields)]
pub struct RealtimeAudioToml {
pub microphone: Option<String>,
pub speaker: Option<String>,
}
#[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq, JsonSchema)]
#[schemars(deny_unknown_fields)]
pub struct ToolsToml {
@@ -2146,6 +2166,12 @@ impl Config {
.chatgpt_base_url
.or(cfg.chatgpt_base_url)
.unwrap_or("https://chatgpt.com/backend-api/".to_string()),
realtime_audio: cfg
.audio
.map_or_else(RealtimeAudioConfig::default, |audio| RealtimeAudioConfig {
microphone: audio.microphone,
speaker: audio.speaker,
}),
experimental_realtime_ws_base_url: cfg.experimental_realtime_ws_base_url,
experimental_realtime_ws_backend_prompt: cfg.experimental_realtime_ws_backend_prompt,
forced_chatgpt_workspace_id,
@@ -4766,6 +4792,7 @@ model_verbosity = "high"
model_verbosity: None,
personality: Some(Personality::Pragmatic),
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
realtime_audio: RealtimeAudioConfig::default(),
experimental_realtime_ws_base_url: None,
experimental_realtime_ws_backend_prompt: None,
base_instructions: None,
@@ -4892,6 +4919,7 @@ model_verbosity = "high"
model_verbosity: None,
personality: Some(Personality::Pragmatic),
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
realtime_audio: RealtimeAudioConfig::default(),
experimental_realtime_ws_base_url: None,
experimental_realtime_ws_backend_prompt: None,
base_instructions: None,
@@ -5016,6 +5044,7 @@ model_verbosity = "high"
model_verbosity: None,
personality: Some(Personality::Pragmatic),
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
realtime_audio: RealtimeAudioConfig::default(),
experimental_realtime_ws_base_url: None,
experimental_realtime_ws_backend_prompt: None,
base_instructions: None,
@@ -5126,6 +5155,7 @@ model_verbosity = "high"
model_verbosity: Some(Verbosity::High),
personality: Some(Personality::Pragmatic),
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
realtime_audio: RealtimeAudioConfig::default(),
experimental_realtime_ws_base_url: None,
experimental_realtime_ws_backend_prompt: None,
base_instructions: None,
@@ -5970,6 +6000,39 @@ experimental_realtime_ws_backend_prompt = "prompt from config"
);
Ok(())
}
#[test]
fn realtime_audio_loads_from_config_toml() -> std::io::Result<()> {
let cfg: ConfigToml = toml::from_str(
r#"
[audio]
microphone = "USB Mic"
speaker = "Desk Speakers"
"#,
)
.expect("TOML deserialization should succeed");
let realtime_audio = cfg
.audio
.as_ref()
.expect("realtime audio config should be present");
assert_eq!(realtime_audio.microphone.as_deref(), Some("USB Mic"));
assert_eq!(realtime_audio.speaker.as_deref(), Some("Desk Speakers"));
let codex_home = TempDir::new()?;
let config = Config::load_from_base_config_with_overrides(
cfg,
ConfigOverrides::default(),
codex_home.path().to_path_buf(),
)?;
assert_eq!(config.realtime_audio.microphone.as_deref(), Some("USB Mic"));
assert_eq!(
config.realtime_audio.speaker.as_deref(),
Some("Desk Speakers")
);
Ok(())
}
}
#[cfg(test)]