Add realtime transcription mode for websocket sessions (#14556)

- add experimental_realtime_ws_mode (conversational/transcription) and plumb it into realtime conversation session config - switch realtime websocket intent and session.update payload shape based on mode - update config schema and realtime/config tests --------- Co-authored-by: Codex <noreply@openai.com>
2026-04-28 18:32:04 +03:00 · 2026-03-12 23:50:30 -07:00
parent eaf81d3f6f
commit 2253a9d1d7
9 changed files with 482 additions and 63 deletions
--- a/codex-rs/core/config.schema.json
+++ b/codex-rs/core/config.schema.json
@@ -1342,6 +1342,13 @@
      },
      "type": "object"
    },
+    "RealtimeWsMode": {
+      "enum": [
+        "conversational",
+        "transcription"
+      ],
+      "type": "string"
+    },
    "ReasoningEffort": {
      "description": "See https://platform.openai.com/docs/guides/reasoning?api-mode=responses#get-started-with-reasoning",
      "enum": [
@@ -1816,6 +1823,14 @@
      "description": "Experimental / do not use. Overrides only the realtime conversation websocket transport base URL (the `Op::RealtimeConversation` `/v1/realtime` connection) without changing normal provider HTTP requests.",
      "type": "string"
    },
+    "experimental_realtime_ws_mode": {
+      "allOf": [
+        {
+          "$ref": "#/definitions/RealtimeWsMode"
+        }
+      ],
+      "description": "Experimental / do not use. Selects the realtime websocket intent mode. `conversational` is speech-to-speech while `transcription` is transcript-only."
+    },
    "experimental_realtime_ws_model": {
      "description": "Experimental / do not use. Selects the realtime websocket model/snapshot used for the `Op::RealtimeConversation` connection.",
      "type": "string"