Add realtime audio device config (#12849)

## Summary - add top-level realtime audio config for microphone and speaker selection - apply configured devices when starting realtime capture and playback - keep missing-device behavior on the system default fallback path ## Validation - just write-config-schema - cargo test -p codex-core realtime_audio - cargo test -p codex-tui - just fix -p codex-core - just fix -p codex-tui - just fmt --------- Co-authored-by: Codex <noreply@openai.com>
2026-04-30 11:21:34 +03:00 · 2026-02-26 15:08:21 -08:00
parent fd719d3828
commit a0e86c69fe
7 changed files with 299 additions and 17 deletions
--- a/codex-rs/core/src/config/mod.rs
+++ b/codex-rs/core/src/config/mod.rs
@@ -426,6 +426,9 @@ pub struct Config {
    /// Base URL for requests to ChatGPT (as opposed to the OpenAI API).
    pub chatgpt_base_url: String,

+    /// Machine-local realtime audio device preferences used by realtime voice.
+    pub realtime_audio: RealtimeAudioConfig,
+
    /// Experimental / do not use. Overrides only the realtime conversation
    /// websocket transport base URL (the `Op::RealtimeConversation` `/ws`
    /// connection) without changing normal provider HTTP requests.
@@ -1175,6 +1178,10 @@ pub struct ConfigToml {
    /// Base URL for requests to ChatGPT (as opposed to the OpenAI API).
    pub chatgpt_base_url: Option<String>,

+    /// Machine-local realtime audio device preferences used by realtime voice.
+    #[serde(default)]
+    pub audio: Option<RealtimeAudioToml>,
+
    /// Experimental / do not use. Overrides only the realtime conversation
    /// websocket transport base URL (the `Op::RealtimeConversation` `/ws`
    /// connection) without changing normal provider HTTP requests.
@@ -1306,6 +1313,19 @@ impl ProjectConfig {
    }
 }

+#[derive(Debug, Clone, Default, PartialEq, Eq)]
+pub struct RealtimeAudioConfig {
+    pub microphone: Option<String>,
+    pub speaker: Option<String>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq, Eq, JsonSchema)]
+#[schemars(deny_unknown_fields)]
+pub struct RealtimeAudioToml {
+    pub microphone: Option<String>,
+    pub speaker: Option<String>,
+}
+
 #[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq, JsonSchema)]
 #[schemars(deny_unknown_fields)]
 pub struct ToolsToml {
@@ -2146,6 +2166,12 @@ impl Config {
                .chatgpt_base_url
                .or(cfg.chatgpt_base_url)
                .unwrap_or("https://chatgpt.com/backend-api/".to_string()),
+            realtime_audio: cfg
+                .audio
+                .map_or_else(RealtimeAudioConfig::default, |audio| RealtimeAudioConfig {
+                    microphone: audio.microphone,
+                    speaker: audio.speaker,
+                }),
            experimental_realtime_ws_base_url: cfg.experimental_realtime_ws_base_url,
            experimental_realtime_ws_backend_prompt: cfg.experimental_realtime_ws_backend_prompt,
            forced_chatgpt_workspace_id,
@@ -4766,6 +4792,7 @@ model_verbosity = "high"
                model_verbosity: None,
                personality: Some(Personality::Pragmatic),
                chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
+                realtime_audio: RealtimeAudioConfig::default(),
                experimental_realtime_ws_base_url: None,
                experimental_realtime_ws_backend_prompt: None,
                base_instructions: None,
@@ -4892,6 +4919,7 @@ model_verbosity = "high"
            model_verbosity: None,
            personality: Some(Personality::Pragmatic),
            chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
+            realtime_audio: RealtimeAudioConfig::default(),
            experimental_realtime_ws_base_url: None,
            experimental_realtime_ws_backend_prompt: None,
            base_instructions: None,
@@ -5016,6 +5044,7 @@ model_verbosity = "high"
            model_verbosity: None,
            personality: Some(Personality::Pragmatic),
            chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
+            realtime_audio: RealtimeAudioConfig::default(),
            experimental_realtime_ws_base_url: None,
            experimental_realtime_ws_backend_prompt: None,
            base_instructions: None,
@@ -5126,6 +5155,7 @@ model_verbosity = "high"
            model_verbosity: Some(Verbosity::High),
            personality: Some(Personality::Pragmatic),
            chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
+            realtime_audio: RealtimeAudioConfig::default(),
            experimental_realtime_ws_base_url: None,
            experimental_realtime_ws_backend_prompt: None,
            base_instructions: None,
@@ -5970,6 +6000,39 @@ experimental_realtime_ws_backend_prompt = "prompt from config"
        );
        Ok(())
    }
+
+    #[test]
+    fn realtime_audio_loads_from_config_toml() -> std::io::Result<()> {
+        let cfg: ConfigToml = toml::from_str(
+            r#"
+[audio]
+microphone = "USB Mic"
+speaker = "Desk Speakers"
+"#,
+        )
+        .expect("TOML deserialization should succeed");
+
+        let realtime_audio = cfg
+            .audio
+            .as_ref()
+            .expect("realtime audio config should be present");
+        assert_eq!(realtime_audio.microphone.as_deref(), Some("USB Mic"));
+        assert_eq!(realtime_audio.speaker.as_deref(), Some("Desk Speakers"));
+
+        let codex_home = TempDir::new()?;
+        let config = Config::load_from_base_config_with_overrides(
+            cfg,
+            ConfigOverrides::default(),
+            codex_home.path().to_path_buf(),
+        )?;
+
+        assert_eq!(config.realtime_audio.microphone.as_deref(), Some("USB Mic"));
+        assert_eq!(
+            config.realtime_audio.speaker.as_deref(),
+            Some("Desk Speakers")
+        );
+        Ok(())
+    }
 }

 #[cfg(test)]