code_mode: Move exec params from runtime declarations to @pragma (#14511)

This change moves code_mode exec session settings out of the runtime API and into an optional first-line pragma, so instead of calling runtime helpers like set_yield_time() or set_max_output_tokens_per_exec_call(), the model can write // @exec: {"yield_time_ms": ..., "max_output_tokens": ...} at the top of the freeform exec source. Rust now parses that pragma before building the source, validates it, and passes the values directly in the exec start message to the code-mode broker, which applies them at session start without any worker-runtime mutation path. The @openai/code_mode module no longer exposes those setter functions, the docs and grammar were updated to describe the pragma form, and the existing code_mode tests were converted to use pragma-based configuration instead.
2026-04-30 11:21:34 +03:00 · 2026-03-12 20:27:42 -07:00
parent 1a363d5fcf
commit 0daffe667a
9 changed files with 235 additions and 99 deletions
--- a/codex-rs/core/tests/suite/code_mode.rs
+++ b/codex-rs/core/tests/suite/code_mode.rs
@@ -28,11 +28,13 @@ use std::time::Instant;
 use wiremock::MockServer;

 fn custom_tool_output_items(req: &ResponsesRequest, call_id: &str) -> Vec<Value> {
-    req.custom_tool_call_output(call_id)
-        .get("output")
-        .and_then(Value::as_array)
-        .expect("custom tool output should be serialized as content items")
-        .clone()
+    match req.custom_tool_call_output(call_id).get("output") {
+        Some(Value::Array(items)) => items.clone(),
+        Some(Value::String(text)) => {
+            vec![serde_json::json!({ "type": "input_text", "text": text })]
+        }
+        _ => panic!("custom tool output should be serialized as text or content items"),
+    }
 }

 fn function_tool_output_items(req: &ResponsesRequest, call_id: &str) -> Vec<Value> {
@@ -332,9 +334,7 @@ async fn code_mode_can_truncate_final_result_with_configured_budget() -> Result<
    let (_test, second_mock) = run_code_mode_turn(
        &server,
        "use exec to truncate the final result",
-        r#"
-set_max_output_tokens_per_exec_call(6);
-
+        r#"// @exec: {"max_output_tokens": 6}
 text(JSON.stringify(await tools.exec_command({
  cmd: "printf 'token one token two token three token four token five token six token seven'",
  max_output_tokens: 100
@@ -427,7 +427,7 @@ async fn code_mode_can_yield_and_resume_with_exec_wait() -> Result<()> {
    let code = format!(
        r#"
 text("phase 1");
-set_yield_time(10);
+yield_control();
 {phase_2_wait}
 text("phase 2");
 {phase_3_wait}
@@ -566,9 +566,8 @@ async fn code_mode_yield_timeout_works_for_busy_loop() -> Result<()> {
    });
    let test = builder.build(&server).await?;

-    let code = r#"
+    let code = r#"// @exec: {"yield_time_ms": 100}
 text("phase 1");
-set_yield_time(10);
 while (true) {}
 "#;

@@ -668,7 +667,7 @@ async fn code_mode_can_run_multiple_yielded_sessions() -> Result<()> {
    let session_a_code = format!(
        r#"
 text("session a start");
-set_yield_time(10);
+yield_control();
 {session_a_wait}
 text("session a done");
 "#
@@ -676,7 +675,7 @@ text("session a done");
    let session_b_code = format!(
        r#"
 text("session b start");
-set_yield_time(10);
+yield_control();
 {session_b_wait}
 text("session b done");
 "#
@@ -834,7 +833,7 @@ async fn code_mode_exec_wait_can_terminate_and_continue() -> Result<()> {
    let code = format!(
        r#"
 text("phase 1");
-set_yield_time(10);
+yield_control();
 {termination_wait}
 text("phase 2");
 "#
@@ -1028,7 +1027,7 @@ async fn code_mode_exec_wait_terminate_returns_completed_session_if_it_finished_
    let session_a_code = format!(
        r#"
 text("session a start");
-set_yield_time(10);
+yield_control();
 {session_a_wait}
 text("session a done");
 await tools.exec_command({{ cmd: {session_a_done_command:?} }});
@@ -1037,7 +1036,7 @@ await tools.exec_command({{ cmd: {session_a_done_command:?} }});
    let session_b_code = format!(
        r#"
 text("session b start");
-set_yield_time(10);
+yield_control();
 {session_b_wait}
 text("session b done");
 "#
@@ -1308,10 +1307,9 @@ async fn code_mode_exec_wait_uses_its_own_max_tokens_budget() -> Result<()> {
    let completion_wait = wait_for_file_source(&completion_gate)?;

    let code = format!(
-        r#"
+        r#"// @exec: {{"max_output_tokens": 100}}
 text("phase 1");
-set_max_output_tokens_per_exec_call(100);
-set_yield_time(10);
+yield_control();
 {completion_wait}
 text("token one token two token three token four token five token six token seven");
 "#
@@ -1630,6 +1628,42 @@ contentLength=0"
    Ok(())
 }

+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn code_mode_exposes_namespaced_mcp_tools_on_global_tools_object() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = responses::start_mock_server().await;
+    let code = r#"
+text(JSON.stringify({
+  hasExecCommand: typeof tools.exec_command === "function",
+  hasNamespacedEcho: typeof tools.mcp__rmcp__echo === "function",
+}));
+"#;
+
+    let (_test, second_mock) =
+        run_code_mode_turn_with_rmcp(&server, "use exec to inspect the global tools object", code)
+            .await?;
+
+    let req = second_mock.single_request();
+    let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
+    assert_ne!(
+        success,
+        Some(false),
+        "exec global tools inspection failed unexpectedly: {output}"
+    );
+
+    let parsed: Value = serde_json::from_str(&output)?;
+    assert_eq!(
+        parsed,
+        serde_json::json!({
+            "hasExecCommand": !cfg!(windows),
+            "hasNamespacedEcho": true,
+        })
+    );
+
+    Ok(())
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn code_mode_exposes_normalized_illegal_mcp_tool_names() -> Result<()> {
    skip_if_no_network!(Ok(()));
@@ -1736,6 +1770,7 @@ text(JSON.stringify(Object.getOwnPropertyNames(globalThis).sort()));
        "WeakSet",
        "WebAssembly",
        "__codexContentItems",
+        "add_content",
        "console",
        "decodeURI",
        "decodeURIComponent",
@@ -1750,8 +1785,6 @@ text(JSON.stringify(Object.getOwnPropertyNames(globalThis).sort()));
        "load",
        "parseFloat",
        "parseInt",
-        "set_max_output_tokens_per_exec_call",
-        "set_yield_time",
        "store",
        "text",
        "tools",
@@ -1918,6 +1951,7 @@ structuredContent=null"

    Ok(())
 }
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn code_mode_can_store_and_load_values_across_turns() -> Result<()> {
    skip_if_no_network!(Ok(()));