Add realtime transcription mode for websocket sessions (#14556)

- add experimental_realtime_ws_mode (conversational/transcription) and
plumb it into realtime conversation session config
- switch realtime websocket intent and session.update payload shape
based on mode
- update config schema and realtime/config tests

---------

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Ahmed Ibrahim
2026-03-12 23:50:30 -07:00
committed by GitHub
parent eaf81d3f6f
commit 2253a9d1d7
9 changed files with 482 additions and 63 deletions

View File

@@ -1342,6 +1342,13 @@
},
"type": "object"
},
"RealtimeWsMode": {
"enum": [
"conversational",
"transcription"
],
"type": "string"
},
"ReasoningEffort": {
"description": "See https://platform.openai.com/docs/guides/reasoning?api-mode=responses#get-started-with-reasoning",
"enum": [
@@ -1816,6 +1823,14 @@
"description": "Experimental / do not use. Overrides only the realtime conversation websocket transport base URL (the `Op::RealtimeConversation` `/v1/realtime` connection) without changing normal provider HTTP requests.",
"type": "string"
},
"experimental_realtime_ws_mode": {
"allOf": [
{
"$ref": "#/definitions/RealtimeWsMode"
}
],
"description": "Experimental / do not use. Selects the realtime websocket intent mode. `conversational` is speech-to-speech while `transcription` is transcript-only."
},
"experimental_realtime_ws_model": {
"description": "Experimental / do not use. Selects the realtime websocket model/snapshot used for the `Op::RealtimeConversation` connection.",
"type": "string"