Move default realtime prompt into core (#17165)

- Adds a core-owned realtime backend prompt template and preparation
path.
- Makes omitted realtime start prompts use the core default, while null
or empty prompts intentionally send empty instructions.
- Covers the core realtime path and app-server v2 path with integration
coverage.

---------

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Ahmed Ibrahim
2026-04-08 19:34:40 -07:00
committed by GitHub
parent 36586eafed
commit 4c2a1ae31b
17 changed files with 491 additions and 59 deletions

View File

@@ -29,6 +29,7 @@ codex_rust_crate(
},
integration_compile_data_extra = [
"//codex-rs/apply-patch:apply_patch_tool_instructions.md",
"templates/realtime/backend_prompt.md",
],
integration_test_timeout = "long",
test_data_extra = [

View File

@@ -116,6 +116,7 @@ tracing = { workspace = true, features = ["log"] }
url = { workspace = true }
uuid = { workspace = true, features = ["serde", "v4", "v5"] }
which = { workspace = true }
whoami = { workspace = true }
zip = { workspace = true }
[target.'cfg(target_os = "macos")'.dependencies]

View File

@@ -13,6 +13,7 @@ mod client_common;
pub(crate) mod codex;
mod realtime_context;
mod realtime_conversation;
mod realtime_prompt;
pub use codex::SteerInputError;
mod codex_thread;
mod compact_remote;

View File

@@ -1,6 +1,7 @@
use crate::client::ModelClient;
use crate::codex::Session;
use crate::realtime_context::build_realtime_startup_context;
use crate::realtime_prompt::prepare_realtime_backend_prompt;
use async_channel::Receiver;
use async_channel::Sender;
use async_channel::TrySendError;
@@ -546,14 +547,14 @@ async fn prepare_realtime_start(
pub(crate) async fn build_realtime_session_config(
sess: &Arc<Session>,
prompt: String,
prompt: Option<Option<String>>,
session_id: Option<String>,
) -> CodexResult<RealtimeSessionConfig> {
let config = sess.get_config().await;
let prompt = config
.experimental_realtime_ws_backend_prompt
.clone()
.unwrap_or(prompt);
let prompt = prepare_realtime_backend_prompt(
prompt,
config.experimental_realtime_ws_backend_prompt.clone(),
);
let startup_context = match config.experimental_realtime_ws_startup_context.clone() {
Some(startup_context) => startup_context,
None => {
@@ -562,10 +563,11 @@ pub(crate) async fn build_realtime_session_config(
.unwrap_or_default()
}
};
let prompt = if startup_context.is_empty() {
prompt
} else {
format!("{prompt}\n\n{startup_context}")
let prompt = match (prompt.is_empty(), startup_context.is_empty()) {
(true, true) => String::new(),
(true, false) => startup_context,
(false, true) => prompt,
(false, false) => format!("{prompt}\n\n{startup_context}"),
};
let model = config.experimental_realtime_ws_model.clone();
let event_parser = match config.realtime.version {

View File

@@ -0,0 +1,81 @@
const BACKEND_PROMPT: &str = include_str!("../templates/realtime/backend_prompt.md");
const DEFAULT_USER_FIRST_NAME: &str = "there";
const USER_FIRST_NAME_PLACEHOLDER: &str = "{{ user_first_name }}";
pub(crate) fn prepare_realtime_backend_prompt(
prompt: Option<Option<String>>,
config_prompt: Option<String>,
) -> String {
if let Some(config_prompt) = config_prompt
&& !config_prompt.trim().is_empty()
{
return config_prompt;
}
match prompt {
Some(Some(prompt)) => return prompt,
Some(None) => return String::new(),
None => {}
}
BACKEND_PROMPT
.trim_end()
.replace(USER_FIRST_NAME_PLACEHOLDER, &current_user_first_name())
}
fn current_user_first_name() -> String {
[whoami::realname(), whoami::username()]
.into_iter()
.filter_map(|name| name.split_whitespace().next().map(str::to_string))
.find(|name| !name.is_empty())
.unwrap_or_else(|| DEFAULT_USER_FIRST_NAME.to_string())
}
#[cfg(test)]
mod tests {
use super::prepare_realtime_backend_prompt;
#[test]
fn prepare_realtime_backend_prompt_prefers_config_override() {
assert_eq!(
prepare_realtime_backend_prompt(
Some(Some("prompt from request".to_string())),
Some("prompt from config".to_string()),
),
"prompt from config"
);
}
#[test]
fn prepare_realtime_backend_prompt_uses_request_prompt() {
assert_eq!(
prepare_realtime_backend_prompt(
Some(Some("prompt from request".to_string())),
/*config_prompt*/ None,
),
"prompt from request"
);
}
#[test]
fn prepare_realtime_backend_prompt_preserves_empty_request_prompt() {
assert_eq!(
prepare_realtime_backend_prompt(Some(Some(String::new())), /*config_prompt*/ None),
""
);
assert_eq!(
prepare_realtime_backend_prompt(Some(None), /*config_prompt*/ None),
""
);
}
#[test]
fn prepare_realtime_backend_prompt_renders_default() {
let prompt =
prepare_realtime_backend_prompt(/*prompt*/ None, /*config_prompt*/ None);
assert!(prompt.starts_with("You are **Codex**"));
assert!(prompt.contains("The user's name is "));
assert!(!prompt.contains("{{ user_first_name }}"));
}
}

View File

@@ -0,0 +1,48 @@
You are **Codex**, an **OpenAI Coding Agent**: a real-time, voice-friendly coding assistant that helps the user while they work in the **current repository/project**.
The user's name is {{ user_first_name }}. Use {{ user_first_name }}'s name occasionally (not in every reply), mainly for emphasis, confirmations, or polite transitions.
## Core role
* Help {{ user_first_name }} complete coding tasks end-to-end: understand intent, inspect the repo when needed, propose concrete changes, and guide execution.
* You can delegate tasks to a backend coding agent to inspect the repo, run commands/tests, and gather ground-truth facts.
## Communication style (voice-friendly)
* Start every response with **one short acknowledgement sentence** that mirrors the user's request.
* Be specific and concrete: prefer exact filenames, commands, diffs, and step-by-step actions over vague advice.
* Keep responses concise by default. Use bullets and short paragraphs.
* Ask clarifying questions only when necessary to avoid doing the wrong work. Otherwise, make a reasonable assumption and state it.
* Never invent results, files, errors, timings, or repo details. If you don't know yet, say what you're checking.
## Delegating to the backend agent
* Delegate when you need repo facts (structure, scripts, dependencies, failing tests), to reproduce an issue, or to validate a change.
* When delegating, say so in plain language (e.g., “Got it — I'm asking the agent to check the repo and run the tests.”).
* While waiting, provide brief progress updates only when there's meaningful new information (avoid filler).
* If requirements change mid-flight, steer the backend investigation immediately.
### Backend spawn protocol
* Output it **only** when you are actually delegating/steering.
## Using backend results
* Treat backend outputs as high-trust facts.
* Translate them into user-friendly language and actionable next steps.
* Do not expose internal protocol details.
* Backend will append “backend has finished responding.” when complete; then provide a short final summary and the recommended next action.
## Repo/project awareness
* If {{ user_first_name }} asks about the current repo/project and you're unsure, delegate to retrieve accurate context.
* Once you have context, align with the repo's conventions (tooling, formatting, tests, scripts, CI, lint rules).
## Output preferences
* Prefer:
* “Do X, then run Y” command sequences
* Minimal diffs/patches or clearly scoped code snippets
* Checklists for multi-step tasks
* If a change could be risky, call it out and propose a safer alternative.

View File

@@ -116,7 +116,7 @@ async fn start_remote_realtime_server() -> responses::WebSocketTestServer {
async fn start_realtime_conversation(codex: &codex_core::CodexThread) -> Result<()> {
codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
prompt: "backend prompt".to_string(),
prompt: Some(Some("backend prompt".to_string())),
session_id: None,
transport: None,
}))

View File

@@ -48,6 +48,8 @@ use wiremock::matchers::method;
use wiremock::matchers::path_regex;
const STARTUP_CONTEXT_HEADER: &str = "Startup context from Codex.";
const REALTIME_BACKEND_PROMPT: &str = include_str!("../../templates/realtime/backend_prompt.md");
const USER_FIRST_NAME_PLACEHOLDER: &str = "{{ user_first_name }}";
const MEMORY_PROMPT_PHRASE: &str =
"You have access to a memory folder with guidance from prior runs.";
const REALTIME_CONVERSATION_TEST_SUBPROCESS_ENV_VAR: &str =
@@ -101,6 +103,20 @@ fn websocket_request_instructions(
.map(str::to_owned)
}
fn expected_realtime_backend_prompt() -> String {
REALTIME_BACKEND_PROMPT
.trim_end()
.replace(USER_FIRST_NAME_PLACEHOLDER, &test_user_first_name())
}
fn test_user_first_name() -> String {
[whoami::realname(), whoami::username()]
.into_iter()
.filter_map(|name| name.split_whitespace().next().map(str::to_string))
.find(|name| !name.is_empty())
.unwrap_or_else(|| "there".to_string())
}
async fn wait_for_matching_websocket_request<F>(
server: &core_test_support::responses::WebSocketTestServer,
description: &str,
@@ -224,7 +240,7 @@ async fn conversation_start_audio_text_close_round_trip() -> Result<()> {
test.codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
prompt: "backend prompt".to_string(),
prompt: Some(Some("backend prompt".to_string())),
session_id: None,
transport: None,
}))
@@ -375,7 +391,7 @@ async fn conversation_webrtc_start_posts_generated_session() -> Result<()> {
test.codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
prompt: "backend prompt".to_string(),
prompt: Some(Some("backend prompt".to_string())),
session_id: None,
transport: Some(ConversationStartTransport::Webrtc {
sdp: "v=offer\r\n".to_string(),
@@ -511,7 +527,7 @@ async fn conversation_start_uses_openai_env_key_fallback_with_chatgpt_auth() ->
test.codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
prompt: "backend prompt".to_string(),
prompt: Some(Some("backend prompt".to_string())),
session_id: None,
transport: None,
}))
@@ -571,7 +587,7 @@ async fn conversation_transport_close_emits_closed_event() -> Result<()> {
test.codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
prompt: "backend prompt".to_string(),
prompt: Some(Some("backend prompt".to_string())),
session_id: None,
transport: None,
}))
@@ -655,7 +671,7 @@ async fn conversation_start_preflight_failure_emits_realtime_error_only() -> Res
test.codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
prompt: "backend prompt".to_string(),
prompt: Some(Some("backend prompt".to_string())),
session_id: None,
transport: None,
}))
@@ -696,7 +712,7 @@ async fn conversation_start_connect_failure_emits_realtime_error_only() -> Resul
test.codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
prompt: "backend prompt".to_string(),
prompt: Some(Some("backend prompt".to_string())),
session_id: None,
transport: None,
}))
@@ -785,7 +801,7 @@ async fn conversation_second_start_replaces_runtime() -> Result<()> {
test.codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
prompt: "old".to_string(),
prompt: Some(Some("old".to_string())),
session_id: Some("conv_old".to_string()),
transport: None,
}))
@@ -802,7 +818,7 @@ async fn conversation_second_start_replaces_runtime() -> Result<()> {
test.codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
prompt: "new".to_string(),
prompt: Some(Some("new".to_string())),
session_id: Some("conv_new".to_string()),
transport: None,
}))
@@ -889,7 +905,7 @@ async fn conversation_uses_experimental_realtime_ws_base_url_override() -> Resul
test.codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
prompt: "backend prompt".to_string(),
prompt: Some(Some("backend prompt".to_string())),
session_id: None,
transport: None,
}))
@@ -919,6 +935,132 @@ async fn conversation_uses_experimental_realtime_ws_base_url_override() -> Resul
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn conversation_uses_default_realtime_backend_prompt() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_websocket_server(vec![
vec![],
vec![vec![json!({
"type": "session.updated",
"session": { "id": "sess_default", "instructions": "default" }
})]],
])
.await;
let mut builder = test_codex().with_config(|config| {
config.experimental_realtime_ws_startup_context =
Some("controlled startup context".to_string());
});
let test = builder.build_with_websocket_server(&server).await?;
assert!(
server
.wait_for_handshakes(/*expected*/ 1, Duration::from_secs(2))
.await
);
test.codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
prompt: None,
session_id: None,
transport: None,
}))
.await?;
let session_updated = wait_for_event_match(&test.codex, |msg| match msg {
EventMsg::RealtimeConversationRealtime(RealtimeConversationRealtimeEvent {
payload: RealtimeEvent::SessionUpdated { session_id, .. },
}) => Some(session_id.clone()),
_ => None,
})
.await;
assert_eq!(session_updated, "sess_default");
let connections = server.connections();
assert_eq!(connections.len(), 2);
let instructions =
websocket_request_instructions(&connections[1][0]).expect("default session instructions");
assert_eq!(
instructions,
format!(
"{}\n\ncontrolled startup context",
expected_realtime_backend_prompt()
)
);
server.shutdown().await;
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn conversation_uses_empty_instructions_for_null_or_empty_prompt() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_websocket_server(vec![
vec![],
vec![vec![json!({
"type": "session.updated",
"session": { "id": "sess_null", "instructions": "" }
})]],
vec![vec![json!({
"type": "session.updated",
"session": { "id": "sess_empty", "instructions": "" }
})]],
])
.await;
let mut builder = test_codex().with_config(|config| {
config.experimental_realtime_ws_startup_context = Some(String::new());
});
let test = builder.build_with_websocket_server(&server).await?;
assert!(
server
.wait_for_handshakes(/*expected*/ 1, Duration::from_secs(2))
.await
);
for (prompt, expected_session_id) in [
(Some(None), "sess_null"),
(Some(Some(String::new())), "sess_empty"),
] {
test.codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
prompt,
session_id: None,
transport: None,
}))
.await?;
let session_updated = wait_for_event_match(&test.codex, |msg| match msg {
EventMsg::RealtimeConversationRealtime(RealtimeConversationRealtimeEvent {
payload: RealtimeEvent::SessionUpdated { session_id, .. },
}) => Some(session_id.clone()),
_ => None,
})
.await;
assert_eq!(session_updated, expected_session_id);
test.codex.submit(Op::RealtimeConversationClose).await?;
let _closed = wait_for_event_match(&test.codex, |msg| match msg {
EventMsg::RealtimeConversationClosed(closed) => Some(closed.clone()),
_ => None,
})
.await;
}
let connections = server.connections();
assert_eq!(connections.len(), 3);
let null_instructions =
websocket_request_instructions(&connections[1][0]).expect("null prompt instructions");
let empty_instructions =
websocket_request_instructions(&connections[2][0]).expect("empty prompt instructions");
assert_eq!(null_instructions, "");
assert_eq!(empty_instructions, "");
server.shutdown().await;
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn conversation_uses_experimental_realtime_ws_backend_prompt_override() -> Result<()> {
skip_if_no_network!(Ok(()));
@@ -944,7 +1086,7 @@ async fn conversation_uses_experimental_realtime_ws_backend_prompt_override() ->
test.codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
prompt: "prompt from op".to_string(),
prompt: Some(Some("prompt from op".to_string())),
session_id: None,
transport: None,
}))
@@ -1007,7 +1149,7 @@ async fn conversation_uses_experimental_realtime_ws_startup_context_override() -
test.codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
prompt: "prompt from op".to_string(),
prompt: Some(Some("prompt from op".to_string())),
session_id: None,
transport: None,
}))
@@ -1068,7 +1210,7 @@ async fn conversation_disables_realtime_startup_context_with_empty_override() ->
test.codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
prompt: "prompt from op".to_string(),
prompt: Some(Some("prompt from op".to_string())),
session_id: None,
transport: None,
}))
@@ -1122,7 +1264,7 @@ async fn conversation_start_injects_startup_context_from_thread_history() -> Res
test.codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
prompt: "backend prompt".to_string(),
prompt: Some(Some("backend prompt".to_string())),
session_id: None,
transport: None,
}))
@@ -1176,7 +1318,7 @@ async fn conversation_startup_context_falls_back_to_workspace_map() -> Result<()
test.codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
prompt: "backend prompt".to_string(),
prompt: Some(Some("backend prompt".to_string())),
session_id: None,
transport: None,
}))
@@ -1228,7 +1370,7 @@ async fn conversation_startup_context_is_truncated_and_sent_once_per_start() ->
test.codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
prompt: "backend prompt".to_string(),
prompt: Some(Some("backend prompt".to_string())),
session_id: None,
transport: None,
}))
@@ -1313,7 +1455,7 @@ async fn conversation_mirrors_assistant_message_text_to_realtime_handoff() -> Re
test.codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
prompt: "backend prompt".to_string(),
prompt: Some(Some("backend prompt".to_string())),
session_id: None,
transport: None,
}))
@@ -1440,7 +1582,7 @@ async fn conversation_handoff_persists_across_item_done_until_turn_complete() ->
test.codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
prompt: "backend prompt".to_string(),
prompt: Some(Some("backend prompt".to_string())),
session_id: None,
transport: None,
}))
@@ -1582,7 +1724,7 @@ async fn inbound_handoff_request_starts_turn() -> Result<()> {
test.codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
prompt: "backend prompt".to_string(),
prompt: Some(Some("backend prompt".to_string())),
session_id: None,
transport: None,
}))
@@ -1677,7 +1819,7 @@ async fn inbound_handoff_request_uses_active_transcript() -> Result<()> {
test.codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
prompt: "backend prompt".to_string(),
prompt: Some(Some("backend prompt".to_string())),
session_id: None,
transport: None,
}))
@@ -1770,7 +1912,7 @@ async fn inbound_handoff_request_clears_active_transcript_after_each_handoff() -
test.codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
prompt: "backend prompt".to_string(),
prompt: Some(Some("backend prompt".to_string())),
session_id: None,
transport: None,
}))
@@ -1870,7 +2012,7 @@ async fn inbound_conversation_item_does_not_start_turn_and_still_forwards_audio(
test.codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
prompt: "backend prompt".to_string(),
prompt: Some(Some("backend prompt".to_string())),
session_id: None,
transport: None,
}))
@@ -1983,7 +2125,7 @@ async fn delegated_turn_user_role_echo_does_not_redelegate_and_still_forwards_au
test.codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
prompt: "backend prompt".to_string(),
prompt: Some(Some("backend prompt".to_string())),
session_id: None,
transport: None,
}))
@@ -2126,7 +2268,7 @@ async fn inbound_handoff_request_does_not_block_realtime_event_forwarding() -> R
test.codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
prompt: "backend prompt".to_string(),
prompt: Some(Some("backend prompt".to_string())),
session_id: None,
transport: None,
}))
@@ -2253,7 +2395,7 @@ async fn inbound_handoff_request_steers_active_turn() -> Result<()> {
test.codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
prompt: "backend prompt".to_string(),
prompt: Some(Some("backend prompt".to_string())),
session_id: None,
transport: None,
}))
@@ -2395,7 +2537,7 @@ async fn inbound_handoff_request_starts_turn_and_does_not_block_realtime_audio()
test.codex
.submit(Op::RealtimeConversationStart(ConversationStartParams {
prompt: "backend prompt".to_string(),
prompt: Some(Some("backend prompt".to_string())),
session_id: None,
transport: None,
}))