Return image URL from view_image tool (#15072)

Cleanup image semantics in code mode.

`view_image` now returns `{image_url:string, details?: string}` 

`image()` now allows both string parameter and `{image_url:string,
details?: string}`
This commit is contained in:
pakrym-oai
2026-03-18 13:58:20 -07:00
committed by GitHub
parent 88e5382fc4
commit 5cada46ddf
13 changed files with 279 additions and 80 deletions

View File

@@ -1,6 +1,8 @@
#![allow(clippy::expect_used, clippy::unwrap_used)]
use anyhow::Result;
use base64::Engine;
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
use codex_core::config::types::McpServerConfig;
use codex_core::config::types::McpServerTransportConfig;
use codex_core::features::Feature;
@@ -1761,6 +1763,90 @@ image("data:image/png;base64,AAA");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_use_view_image_result_with_image_helper() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let mut builder = test_codex()
.with_model("gpt-5.3-codex")
.with_config(move |config| {
let _ = config.features.enable(Feature::CodeMode);
let _ = config.features.enable(Feature::ImageDetailOriginal);
});
let test = builder.build(&server).await?;
let image_bytes = BASE64_STANDARD.decode(
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==",
)?;
let image_path = test.cwd_path().join("code_mode_view_image.png");
fs::write(&image_path, image_bytes)?;
let image_path_json = serde_json::to_string(&image_path.to_string_lossy().to_string())?;
let code = format!(
r#"
const out = await tools.view_image({{ path: {image_path_json}, detail: "original" }});
image(out);
"#
);
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_custom_tool_call("call-1", "exec", &code),
ev_completed("resp-1"),
]),
)
.await;
let second_mock = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
)
.await;
test.submit_turn("use exec to call view_image and emit its image output")
.await?;
let req = second_mock.single_request();
let items = custom_tool_output_items(&req, "call-1");
let (_, success) = custom_tool_output_body_and_success(&req, "call-1");
assert_ne!(
success,
Some(false),
"code_mode view_image call failed unexpectedly"
);
assert_eq!(items.len(), 2);
assert_regex_match(
concat!(
r"(?s)\A",
r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
),
text_item(&items, 0),
);
assert_eq!(
items[1].get("type").and_then(Value::as_str),
Some("input_image")
);
let emitted_image_url = items[1]
.get("image_url")
.and_then(Value::as_str)
.expect("image helper should emit an input_image item with image_url");
assert!(emitted_image_url.starts_with("data:image/png;base64,"));
assert_eq!(
items[1].get("detail").and_then(Value::as_str),
Some("original")
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_apply_patch_via_nested_tool() -> Result<()> {
skip_if_no_network!(Ok(()));
@@ -2084,7 +2170,7 @@ text(JSON.stringify(tool));
parsed,
serde_json::json!({
"name": "view_image",
"description": "View a local image from the filesystem (only use if given a full filepath by the user, and the image isn't already attached to the thread context within <image ...> tags).\n\nexec tool declaration:\n```ts\ndeclare const tools: { view_image(args: { path: string; }): Promise<unknown>; };\n```",
"description": "View a local image from the filesystem (only use if given a full filepath by the user, and the image isn't already attached to the thread context within <image ...> tags).\n\nexec tool declaration:\n```ts\ndeclare const tools: { view_image(args: { path: string; }): Promise<{ detail: string | null; image_url: string; }>; };\n```",
})
);

View File

@@ -1087,7 +1087,7 @@ async fn view_image_tool_errors_when_path_is_directory() -> anyhow::Result<()> {
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn view_image_tool_placeholder_for_non_image_files() -> anyhow::Result<()> {
async fn view_image_tool_errors_for_non_image_files() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
@@ -1150,20 +1150,19 @@ async fn view_image_tool_placeholder_for_non_image_files() -> anyhow::Result<()>
request.inputs_of_type("input_image").is_empty(),
"non-image file should not produce an input_image message"
);
let (placeholder, success) = request
let (error_text, success) = request
.function_call_output_content_and_success(call_id)
.expect("function_call_output should be present");
assert_eq!(success, None);
let placeholder = placeholder.expect("placeholder text present");
let error_text = error_text.expect("error text present");
assert!(
placeholder.contains("Codex could not read the local image at")
&& placeholder.contains("unsupported MIME type `application/json`"),
"placeholder should describe the unsupported file type: {placeholder}"
let expected_error = format!(
"unable to process image at `{}`: unsupported image `application/json`",
abs_path.display()
);
assert!(
placeholder.contains(&abs_path.display().to_string()),
"placeholder should mention path: {placeholder}"
error_text.contains(&expected_error),
"error should describe unsupported file type: {error_text}"
);
Ok(())