realtime: disable output interruption from turn detection

This commit is contained in:
Ahmed Ibrahim
2026-03-04 17:14:23 -08:00
parent 159bda93c6
commit a5420779c4
3 changed files with 39 additions and 0 deletions

View File

@@ -9,6 +9,7 @@ use crate::endpoint::realtime_websocket::protocol::SessionAudioFormat;
use crate::endpoint::realtime_websocket::protocol::SessionAudioInput;
use crate::endpoint::realtime_websocket::protocol::SessionAudioOutput;
use crate::endpoint::realtime_websocket::protocol::SessionAudioOutputFormat;
use crate::endpoint::realtime_websocket::protocol::SessionTurnDetection;
use crate::endpoint::realtime_websocket::protocol::SessionTool;
use crate::endpoint::realtime_websocket::protocol::SessionToolParameters;
use crate::endpoint::realtime_websocket::protocol::SessionToolProperties;
@@ -343,6 +344,11 @@ impl RealtimeWebsocketWriter {
kind: "audio/pcm".to_string(),
rate: 24_000,
},
turn_detection: SessionTurnDetection {
kind: "semantic_vad".to_string(),
interrupt_response: false,
create_response: true,
},
},
output: SessionAudioOutput {
format: SessionAudioOutputFormat {
@@ -899,6 +905,18 @@ mod tests {
first_json["session"]["audio"]["input"]["format"]["rate"],
Value::from(24_000)
);
assert_eq!(
first_json["session"]["audio"]["input"]["turn_detection"]["type"],
Value::String("semantic_vad".to_string())
);
assert_eq!(
first_json["session"]["audio"]["input"]["turn_detection"]["interrupt_response"],
Value::Bool(false)
);
assert_eq!(
first_json["session"]["audio"]["input"]["turn_detection"]["create_response"],
Value::Bool(true)
);
assert_eq!(
first_json["session"]["audio"]["output"]["format"]["type"],
Value::String("audio/pcm".to_string())

View File

@@ -48,6 +48,7 @@ pub(super) struct SessionAudio {
#[derive(Debug, Clone, Serialize)]
pub(super) struct SessionAudioInput {
pub(super) format: SessionAudioFormat,
pub(super) turn_detection: SessionTurnDetection,
}
#[derive(Debug, Clone, Serialize)]
@@ -57,6 +58,14 @@ pub(super) struct SessionAudioFormat {
pub(super) rate: u32,
}
#[derive(Debug, Clone, Serialize)]
pub(super) struct SessionTurnDetection {
#[serde(rename = "type")]
pub(super) kind: String,
pub(super) interrupt_response: bool,
pub(super) create_response: bool,
}
#[derive(Debug, Clone, Serialize)]
pub(super) struct SessionAudioOutput {
pub(super) format: SessionAudioOutputFormat,

View File

@@ -95,6 +95,18 @@ async fn realtime_ws_e2e_session_create_and_event_flow() {
first_json["session"]["audio"]["input"]["format"]["rate"],
Value::from(24_000)
);
assert_eq!(
first_json["session"]["audio"]["input"]["turn_detection"]["type"],
Value::String("semantic_vad".to_string())
);
assert_eq!(
first_json["session"]["audio"]["input"]["turn_detection"]["interrupt_response"],
Value::Bool(false)
);
assert_eq!(
first_json["session"]["audio"]["input"]["turn_detection"]["create_response"],
Value::Bool(true)
);
assert_eq!(
first_json["session"]["tool_choice"],
Value::String("auto".to_string())