mirror of
https://github.com/openai/codex.git
synced 2026-04-30 11:21:34 +03:00
Let models opt into original image detail (#14175)
## Summary This PR narrows original image detail handling to a single opt-in feature: - `image_detail_original` lets the model request `detail: "original"` on supported models - Omitting `detail` preserves the default resized behavior The model only sees `detail: "original"` guidance when the active model supports it: - JS REPL instructions include the guidance and examples only on supported models - `view_image` only exposes a `detail` parameter when the feature and model can use it The image detail API is intentionally narrow and consistent across both paths: - `view_image.detail` supports only `"original"`; otherwise omit the field - `codex.emitImage(..., detail)` supports only `"original"`; otherwise omit the field - Unsupported explicit values fail clearly at the API boundary instead of being silently reinterpreted - Unsupported explicit `detail: "original"` requests fall back to normal behavior when the feature is disabled or the model does not support original detail
This commit is contained in:
committed by
GitHub
parent
f548309797
commit
8791f0ab9a
@@ -296,7 +296,8 @@ async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> {
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn view_image_tool_can_preserve_original_resolution_on_gpt5_3_codex() -> anyhow::Result<()> {
|
||||
async fn view_image_tool_can_preserve_original_resolution_when_requested_on_gpt5_3_codex()
|
||||
-> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
@@ -326,7 +327,7 @@ async fn view_image_tool_can_preserve_original_resolution_on_gpt5_3_codex() -> a
|
||||
image.save(&abs_path)?;
|
||||
|
||||
let call_id = "view-image-original";
|
||||
let arguments = serde_json::json!({ "path": rel_path }).to_string();
|
||||
let arguments = serde_json::json!({ "path": rel_path, "detail": "original" }).to_string();
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
@@ -400,7 +401,191 @@ async fn view_image_tool_can_preserve_original_resolution_on_gpt5_3_codex() -> a
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn view_image_tool_keeps_legacy_behavior_below_gpt5_3_codex() -> anyhow::Result<()> {
|
||||
async fn view_image_tool_errors_clearly_for_unsupported_detail_values() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex()
|
||||
.with_model("gpt-5.3-codex")
|
||||
.with_config(|config| {
|
||||
config
|
||||
.features
|
||||
.enable(Feature::ImageDetailOriginal)
|
||||
.expect("test config should allow feature update");
|
||||
});
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let rel_path = "assets/unsupported-detail.png";
|
||||
let abs_path = cwd.path().join(rel_path);
|
||||
if let Some(parent) = abs_path.parent() {
|
||||
std::fs::create_dir_all(parent)?;
|
||||
}
|
||||
let image = ImageBuffer::from_pixel(256, 128, Rgba([0u8, 80, 255, 255]));
|
||||
image.save(&abs_path)?;
|
||||
|
||||
let call_id = "view-image-unsupported-detail";
|
||||
let arguments = serde_json::json!({ "path": rel_path, "detail": "low" }).to_string();
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "view_image", &arguments),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once(&server, first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
let mock = responses::mount_sse_once(&server, second_response).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "please attach the image at low detail".into(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
service_tier: None,
|
||||
summary: None,
|
||||
collaboration_mode: None,
|
||||
personality: None,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
let req = mock.single_request();
|
||||
let body_with_tool_output = req.body_json();
|
||||
let output_text = req
|
||||
.function_call_output_content_and_success(call_id)
|
||||
.and_then(|(content, _)| content)
|
||||
.expect("output text present");
|
||||
assert_eq!(
|
||||
output_text,
|
||||
"view_image.detail only supports `original`; omit `detail` for default resized behavior, got `low`"
|
||||
);
|
||||
|
||||
assert!(
|
||||
find_image_message(&body_with_tool_output).is_none(),
|
||||
"unsupported detail values should not produce an input_image message"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn view_image_tool_treats_null_detail_as_omitted() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex()
|
||||
.with_model("gpt-5.3-codex")
|
||||
.with_config(|config| {
|
||||
config
|
||||
.features
|
||||
.enable(Feature::ImageDetailOriginal)
|
||||
.expect("test config should allow feature update");
|
||||
});
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let rel_path = "assets/null-detail.png";
|
||||
let abs_path = cwd.path().join(rel_path);
|
||||
if let Some(parent) = abs_path.parent() {
|
||||
std::fs::create_dir_all(parent)?;
|
||||
}
|
||||
let original_width = 2304;
|
||||
let original_height = 864;
|
||||
let image = ImageBuffer::from_pixel(original_width, original_height, Rgba([0u8, 80, 255, 255]));
|
||||
image.save(&abs_path)?;
|
||||
|
||||
let call_id = "view-image-null-detail";
|
||||
let arguments = serde_json::json!({ "path": rel_path, "detail": null }).to_string();
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "view_image", &arguments),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once(&server, first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
let mock = responses::mount_sse_once(&server, second_response).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "please attach the image with a null detail".into(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
service_tier: None,
|
||||
summary: None,
|
||||
collaboration_mode: None,
|
||||
personality: None,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
let req = mock.single_request();
|
||||
let function_output = req.function_call_output(call_id);
|
||||
let output_items = function_output
|
||||
.get("output")
|
||||
.and_then(Value::as_array)
|
||||
.expect("function_call_output should be a content item array");
|
||||
assert_eq!(output_items.len(), 1);
|
||||
assert_eq!(output_items[0].get("detail"), None);
|
||||
let image_url = output_items[0]
|
||||
.get("image_url")
|
||||
.and_then(Value::as_str)
|
||||
.expect("image_url present");
|
||||
|
||||
let (_, encoded) = image_url
|
||||
.split_once(',')
|
||||
.expect("image url contains data prefix");
|
||||
let decoded = BASE64_STANDARD
|
||||
.decode(encoded)
|
||||
.expect("image data decodes from base64 for request");
|
||||
let resized = load_from_memory(&decoded).expect("load resized image");
|
||||
let (width, height) = resized.dimensions();
|
||||
assert!(width <= 2048);
|
||||
assert!(height <= 768);
|
||||
assert!(width < original_width);
|
||||
assert!(height < original_height);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn view_image_tool_resizes_when_model_lacks_original_detail_support() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
@@ -503,6 +688,110 @@ async fn view_image_tool_keeps_legacy_behavior_below_gpt5_3_codex() -> anyhow::R
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn view_image_tool_does_not_force_original_resolution_with_capability_feature_only()
|
||||
-> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex()
|
||||
.with_model("gpt-5.3-codex")
|
||||
.with_config(|config| {
|
||||
config
|
||||
.features
|
||||
.enable(Feature::ImageDetailOriginal)
|
||||
.expect("test config should allow feature update");
|
||||
});
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let rel_path = "assets/original-example-capability-only.png";
|
||||
let abs_path = cwd.path().join(rel_path);
|
||||
if let Some(parent) = abs_path.parent() {
|
||||
std::fs::create_dir_all(parent)?;
|
||||
}
|
||||
let original_width = 2304;
|
||||
let original_height = 864;
|
||||
let image = ImageBuffer::from_pixel(original_width, original_height, Rgba([0u8, 80, 255, 255]));
|
||||
image.save(&abs_path)?;
|
||||
|
||||
let call_id = "view-image-capability-only";
|
||||
let arguments = serde_json::json!({ "path": rel_path }).to_string();
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "view_image", &arguments),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once(&server, first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
let mock = responses::mount_sse_once(&server, second_response).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "please add the screenshot".into(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
service_tier: None,
|
||||
summary: None,
|
||||
collaboration_mode: None,
|
||||
personality: None,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event_with_timeout(
|
||||
&codex,
|
||||
|event| matches!(event, EventMsg::TurnComplete(_)),
|
||||
Duration::from_secs(10),
|
||||
)
|
||||
.await;
|
||||
|
||||
let req = mock.single_request();
|
||||
let function_output = req.function_call_output(call_id);
|
||||
let output_items = function_output
|
||||
.get("output")
|
||||
.and_then(Value::as_array)
|
||||
.expect("function_call_output should be a content item array");
|
||||
assert_eq!(output_items.len(), 1);
|
||||
assert_eq!(output_items[0].get("detail"), None);
|
||||
let image_url = output_items[0]
|
||||
.get("image_url")
|
||||
.and_then(Value::as_str)
|
||||
.expect("image_url present");
|
||||
|
||||
let (_, encoded) = image_url
|
||||
.split_once(',')
|
||||
.expect("image url contains data prefix");
|
||||
let decoded = BASE64_STANDARD
|
||||
.decode(encoded)
|
||||
.expect("image data decodes from base64 for request");
|
||||
let resized = load_from_memory(&decoded).expect("load resized image");
|
||||
let (resized_width, resized_height) = resized.dimensions();
|
||||
assert!(resized_width <= 2048);
|
||||
assert!(resized_height <= 768);
|
||||
assert!(resized_width < original_width);
|
||||
assert!(resized_height < original_height);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn js_repl_emit_image_attaches_local_image() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
Reference in New Issue
Block a user