Use parser-specific realtime voice enum (#14636)

Model realtime session output voices with an enum and map by parser so
v1 uses fathom and v2 uses alloy.

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Ahmed Ibrahim
2026-03-13 16:17:13 -07:00
committed by GitHub
parent e9050e3e64
commit 7fa5201365
2 changed files with 19 additions and 9 deletions

View File

@@ -14,6 +14,7 @@ use crate::endpoint::realtime_websocket::protocol::SessionAudio;
use crate::endpoint::realtime_websocket::protocol::SessionAudioFormat;
use crate::endpoint::realtime_websocket::protocol::SessionAudioInput;
use crate::endpoint::realtime_websocket::protocol::SessionAudioOutput;
use crate::endpoint::realtime_websocket::protocol::SessionAudioVoice;
use crate::endpoint::realtime_websocket::protocol::SessionFunctionTool;
use crate::endpoint::realtime_websocket::protocol::SessionUpdateSession;
use crate::endpoint::realtime_websocket::protocol::parse_realtime_event;
@@ -47,7 +48,6 @@ use tungstenite::protocol::WebSocketConfig;
use url::Url;
const REALTIME_AUDIO_SAMPLE_RATE: u32 = 24_000;
const REALTIME_AUDIO_VOICE: &str = "fathom";
const REALTIME_V1_SESSION_TYPE: &str = "quicksilver";
const REALTIME_V2_SESSION_TYPE: &str = "realtime";
const REALTIME_V2_CODEX_TOOL_NAME: &str = "codex";
@@ -353,13 +353,11 @@ impl RealtimeWebsocketWriter {
RealtimeEventParser::V1 => REALTIME_V1_SESSION_TYPE.to_string(),
RealtimeEventParser::RealtimeV2 => REALTIME_V2_SESSION_TYPE.to_string(),
};
(
kind,
Some(instructions),
Some(SessionAudioOutput {
voice: REALTIME_AUDIO_VOICE.to_string(),
}),
)
let voice = match self.event_parser {
RealtimeEventParser::V1 => SessionAudioVoice::Fathom,
RealtimeEventParser::RealtimeV2 => SessionAudioVoice::Alloy,
};
(kind, Some(instructions), Some(SessionAudioOutput { voice }))
}
RealtimeSessionMode::Transcription => ("transcription".to_string(), None, None),
};
@@ -1388,6 +1386,10 @@ mod tests {
first_json["session"]["type"],
Value::String("realtime".to_string())
);
assert_eq!(
first_json["session"]["audio"]["output"]["voice"],
Value::String("alloy".to_string())
);
assert_eq!(
first_json["session"]["tools"][0]["type"],
Value::String("function".to_string())

View File

@@ -77,7 +77,15 @@ pub(super) struct SessionAudioFormat {
#[derive(Debug, Clone, Serialize)]
pub(super) struct SessionAudioOutput {
pub(super) voice: String,
pub(super) voice: SessionAudioVoice,
}
#[derive(Debug, Clone, Copy, Serialize)]
pub(super) enum SessionAudioVoice {
#[serde(rename = "fathom")]
Fathom,
#[serde(rename = "alloy")]
Alloy,
}
#[derive(Debug, Clone, Serialize)]