mirror of
https://github.com/openai/codex.git
synced 2026-05-01 20:02:05 +03:00
Make js_repl image output controllable (#13331)
## Summary
Instead of always adding inner function call outputs to the model
context, let js code decide which ones to return.
- Stop auto-hoisting nested tool outputs from `codex.tool(...)` into the
outer `js_repl` function output.
- Keep `codex.tool(...)` return values unchanged as structured JS
objects.
- Add `codex.emitImage(...)` as the explicit path for attaching an image
to the outer `js_repl` function output.
- Support emitting from a direct image URL, a single `input_image` item,
an explicit `{ bytes, mimeType }` object, or a raw tool response object
containing exactly one image.
- Preserve existing `view_image` original-resolution behavior when JS
emits the raw `view_image` tool result.
- Suppress the special `ViewImageToolCall` event for `js_repl`-sourced
`view_image` calls so nested inspection stays side-effect free until JS
explicitly emits.
- Update the `js_repl` docs and generated project instructions with both
recommended patterns:
- `await codex.emitImage(codex.tool("view_image", { path }))`
- `await codex.emitImage({ bytes: await page.screenshot({ type: "jpeg",
quality: 85 }), mimeType: "image/jpeg" })`
#### [git stack](https://github.com/magus/git-stack-cli)
- ✅ `1` https://github.com/openai/codex/pull/13050
- 👉 `2` https://github.com/openai/codex/pull/13331
- ⏳ `3` https://github.com/openai/codex/pull/13049
This commit is contained in:
committed by
GitHub
parent
1afbbc11c3
commit
c4cb594e73
@@ -21,7 +21,9 @@ use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::config_types::Settings;
|
||||
use codex_protocol::config_types::Verbosity;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::ImageDetail;
|
||||
use codex_protocol::models::LocalShellAction;
|
||||
use codex_protocol::models::LocalShellExecAction;
|
||||
use codex_protocol::models::LocalShellStatus;
|
||||
@@ -485,6 +487,127 @@ async fn resume_replays_legacy_js_repl_image_rollout_shapes() {
|
||||
assert!(legacy_image_index < new_user_index);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn resume_replays_image_tool_outputs_with_detail() {
|
||||
skip_if_no_network!();
|
||||
|
||||
let image_url = "data:image/webp;base64,UklGRiIAAABXRUJQVlA4IBYAAAAwAQCdASoBAAEAAUAmJaACdLoB+AADsAD+8ut//NgVzXPv9//S4P0uD9Lg/9KQAAA=";
|
||||
let function_call_id = "view-image-call";
|
||||
let custom_call_id = "js-repl-call";
|
||||
let rollout = vec![
|
||||
RolloutLine {
|
||||
timestamp: "2024-01-01T00:00:00.000Z".to_string(),
|
||||
item: RolloutItem::SessionMeta(SessionMetaLine {
|
||||
meta: SessionMeta {
|
||||
id: ThreadId::default(),
|
||||
timestamp: "2024-01-01T00:00:00Z".to_string(),
|
||||
cwd: ".".into(),
|
||||
originator: "test_originator".to_string(),
|
||||
cli_version: "test_version".to_string(),
|
||||
model_provider: Some("test-provider".to_string()),
|
||||
..Default::default()
|
||||
},
|
||||
git: None,
|
||||
}),
|
||||
},
|
||||
RolloutLine {
|
||||
timestamp: "2024-01-01T00:00:01.000Z".to_string(),
|
||||
item: RolloutItem::ResponseItem(ResponseItem::FunctionCall {
|
||||
id: None,
|
||||
name: "view_image".to_string(),
|
||||
arguments: "{\"path\":\"/tmp/example.webp\"}".to_string(),
|
||||
call_id: function_call_id.to_string(),
|
||||
}),
|
||||
},
|
||||
RolloutLine {
|
||||
timestamp: "2024-01-01T00:00:01.500Z".to_string(),
|
||||
item: RolloutItem::ResponseItem(ResponseItem::FunctionCallOutput {
|
||||
call_id: function_call_id.to_string(),
|
||||
output: FunctionCallOutputPayload::from_content_items(vec![
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: image_url.to_string(),
|
||||
detail: Some(ImageDetail::Original),
|
||||
},
|
||||
]),
|
||||
}),
|
||||
},
|
||||
RolloutLine {
|
||||
timestamp: "2024-01-01T00:00:02.000Z".to_string(),
|
||||
item: RolloutItem::ResponseItem(ResponseItem::CustomToolCall {
|
||||
id: None,
|
||||
status: Some("completed".to_string()),
|
||||
call_id: custom_call_id.to_string(),
|
||||
name: "js_repl".to_string(),
|
||||
input: "console.log('image flow')".to_string(),
|
||||
}),
|
||||
},
|
||||
RolloutLine {
|
||||
timestamp: "2024-01-01T00:00:02.500Z".to_string(),
|
||||
item: RolloutItem::ResponseItem(ResponseItem::CustomToolCallOutput {
|
||||
call_id: custom_call_id.to_string(),
|
||||
output: FunctionCallOutputPayload::from_content_items(vec![
|
||||
FunctionCallOutputContentItem::InputImage {
|
||||
image_url: image_url.to_string(),
|
||||
detail: Some(ImageDetail::Original),
|
||||
},
|
||||
]),
|
||||
}),
|
||||
},
|
||||
];
|
||||
|
||||
let tmpdir = TempDir::new().unwrap();
|
||||
let session_path = tmpdir
|
||||
.path()
|
||||
.join("resume-image-tool-outputs-with-detail.jsonl");
|
||||
let mut file = std::fs::File::create(&session_path).unwrap();
|
||||
for line in rollout {
|
||||
writeln!(file, "{}", serde_json::to_string(&line).unwrap()).unwrap();
|
||||
}
|
||||
|
||||
let server = MockServer::start().await;
|
||||
let resp_mock = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![ev_response_created("resp1"), ev_completed("resp1")]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let codex_home = Arc::new(TempDir::new().unwrap());
|
||||
let mut builder = test_codex().with_model("gpt-5.1");
|
||||
let test = builder
|
||||
.resume(&server, codex_home, session_path.clone())
|
||||
.await
|
||||
.expect("resume conversation");
|
||||
test.submit_turn("after resume").await.unwrap();
|
||||
|
||||
let function_output = resp_mock
|
||||
.single_request()
|
||||
.function_call_output(function_call_id);
|
||||
assert_eq!(
|
||||
function_output.get("output"),
|
||||
Some(&serde_json::json!([
|
||||
{
|
||||
"type": "input_image",
|
||||
"image_url": image_url,
|
||||
"detail": "original"
|
||||
}
|
||||
]))
|
||||
);
|
||||
|
||||
let custom_output = resp_mock
|
||||
.single_request()
|
||||
.custom_tool_call_output(custom_call_id);
|
||||
assert_eq!(
|
||||
custom_output.get("output"),
|
||||
Some(&serde_json::json!([
|
||||
{
|
||||
"type": "input_image",
|
||||
"image_url": image_url,
|
||||
"detail": "original"
|
||||
}
|
||||
]))
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn includes_conversation_id_and_model_headers_in_request() {
|
||||
skip_if_no_network!();
|
||||
|
||||
@@ -494,7 +494,7 @@ async fn view_image_tool_keeps_legacy_behavior_below_gpt5_3_codex() -> anyhow::R
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn js_repl_view_image_tool_attaches_local_image() -> anyhow::Result<()> {
|
||||
async fn js_repl_emit_image_attaches_local_image() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
@@ -519,7 +519,7 @@ const png = Buffer.from(
|
||||
);
|
||||
await fs.writeFile(imagePath, png);
|
||||
const out = await codex.tool("view_image", { path: imagePath });
|
||||
console.log(out.output?.body?.text ?? "");
|
||||
await codex.emitImage(out);
|
||||
"#;
|
||||
|
||||
let first_response = sse(vec![
|
||||
@@ -555,12 +555,29 @@ console.log(out.output?.body?.text ?? "");
|
||||
})
|
||||
.await?;
|
||||
|
||||
let mut tool_event = None;
|
||||
wait_for_event_with_timeout(
|
||||
&codex,
|
||||
|event| matches!(event, EventMsg::TurnComplete(_)),
|
||||
|event| match event {
|
||||
EventMsg::ViewImageToolCall(_) => {
|
||||
tool_event = Some(event.clone());
|
||||
false
|
||||
}
|
||||
EventMsg::TurnComplete(_) => true,
|
||||
_ => false,
|
||||
},
|
||||
Duration::from_secs(10),
|
||||
)
|
||||
.await;
|
||||
let tool_event = match tool_event {
|
||||
Some(EventMsg::ViewImageToolCall(event)) => event,
|
||||
other => panic!("expected ViewImageToolCall event, got {other:?}"),
|
||||
};
|
||||
assert!(
|
||||
tool_event.path.ends_with("js-repl-view-image.png"),
|
||||
"unexpected image path: {}",
|
||||
tool_event.path.display()
|
||||
);
|
||||
|
||||
let req = mock.single_request();
|
||||
let body = req.body_json();
|
||||
@@ -591,6 +608,105 @@ console.log(out.output?.body?.text ?? "");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn js_repl_view_image_requires_explicit_emit() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.features.enable(Feature::JsRepl);
|
||||
});
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let call_id = "js-repl-view-image-no-emit";
|
||||
let js_input = r#"
|
||||
const fs = await import("node:fs/promises");
|
||||
const path = await import("node:path");
|
||||
const imagePath = path.join(codex.tmpDir, "js-repl-view-image-no-emit.png");
|
||||
const png = Buffer.from(
|
||||
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==",
|
||||
"base64"
|
||||
);
|
||||
await fs.writeFile(imagePath, png);
|
||||
const out = await codex.tool("view_image", { path: imagePath });
|
||||
console.log(out.type);
|
||||
"#;
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_custom_tool_call(call_id, "js_repl", js_input),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once(&server, first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
let mock = responses::mount_sse_once(&server, second_response).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "use js_repl to write an image but do not emit it".into(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
service_tier: None,
|
||||
summary: None,
|
||||
collaboration_mode: None,
|
||||
personality: None,
|
||||
})
|
||||
.await?;
|
||||
|
||||
let mut tool_event = None;
|
||||
wait_for_event_with_timeout(
|
||||
&codex,
|
||||
|event| match event {
|
||||
EventMsg::ViewImageToolCall(_) => {
|
||||
tool_event = Some(event.clone());
|
||||
false
|
||||
}
|
||||
EventMsg::TurnComplete(_) => true,
|
||||
_ => false,
|
||||
},
|
||||
Duration::from_secs(10),
|
||||
)
|
||||
.await;
|
||||
let tool_event = match tool_event {
|
||||
Some(EventMsg::ViewImageToolCall(event)) => event,
|
||||
other => panic!("expected ViewImageToolCall event, got {other:?}"),
|
||||
};
|
||||
assert!(
|
||||
tool_event.path.ends_with("js-repl-view-image-no-emit.png"),
|
||||
"unexpected image path: {}",
|
||||
tool_event.path.display()
|
||||
);
|
||||
|
||||
let req = mock.single_request();
|
||||
let custom_output = req.custom_tool_call_output(call_id);
|
||||
let output_items = custom_output.get("output").and_then(Value::as_array);
|
||||
assert!(
|
||||
output_items.is_none_or(|items| items
|
||||
.iter()
|
||||
.all(|item| item.get("type").and_then(Value::as_str) != Some("input_image"))),
|
||||
"nested view_image should not auto-populate js_repl output"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn view_image_tool_errors_when_path_is_directory() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
Reference in New Issue
Block a user