Support original-detail metadata on MCP image outputs (#17714)

## Summary
- honor `_meta["codex/imageDetail"] == "original"` on MCP image content
and map it to `detail: "original"` where supported
- strip that detail back out when the active model does not support
original-detail image inputs
- update code-mode `image(...)` to accept individual MCP image blocks
- teach `js_repl` / `codex.emitImage(...)` to preserve the same hint
from raw MCP image outputs
- document the new `_meta` contract and add generic RMCP-backed coverage
across protocol, core, code-mode, and js_repl paths
This commit is contained in:
Curtis 'Fjord' Hawthorne
2026-04-15 14:43:33 -07:00
committed by GitHub
parent 17d94bd1e3
commit 9e2fc31854
20 changed files with 905 additions and 368 deletions

View File

@@ -24,7 +24,7 @@ const EXEC_DESCRIPTION_TEMPLATE: &str = r#"Run JavaScript code to orchestrate/co
- Global helpers:
- `exit()`: Immediately ends the current script successfully (like an early return from the top level).
- `text(value: string | number | boolean | undefined | null)`: Appends a text item. Non-string values are stringified with `JSON.stringify(...)` when possible.
- `image(imageUrlOrItem: string | { image_url: string; detail?: "auto" | "low" | "high" | "original" | null })`: Appends an image item. `image_url` can be an HTTPS URL or a base64-encoded `data:` URL.
- `image(imageUrlOrItem: string | { image_url: string; detail?: "auto" | "low" | "high" | "original" | null } | ImageContent, detail?: "auto" | "low" | "high" | "original" | null)`: Appends an image item. `image_url` can be an HTTPS URL or a base64-encoded `data:` URL. To forward an MCP tool image, pass an individual `ImageContent` block from `result.content`, for example `image(result.content[0])`. MCP image blocks may request original detail with `_meta: { "codex/imageDetail": "original" }`. When provided, the second `detail` argument overrides any detail embedded in the first argument.
- `store(key: string, value: any)`: stores a serializable value under a string key for later `exec` calls in the same session.
- `load(key: string)`: returns the stored value for a string key, or `undefined` if it is missing.
- `notify(value: string | number | boolean | undefined | null)`: immediately injects an extra `custom_tool_call_output` for the current `exec` call. Values are stringified like `text(...)`.

View File

@@ -109,7 +109,20 @@ pub(super) fn image_callback(
} else {
args.get(0)
};
let image_item = match normalize_output_image(scope, value) {
let detail_override = if args.length() < 2 {
None
} else {
let detail = args.get(1);
if detail.is_string() {
Some(detail.to_rust_string_lossy(scope))
} else if detail.is_null() || detail.is_undefined() {
None
} else {
throw_type_error(scope, "image detail must be a string when provided");
return;
}
};
let image_item = match normalize_output_image(scope, value, detail_override) {
Ok(image_item) => image_item,
Err(()) => return,
};

View File

@@ -3,6 +3,9 @@ use serde_json::Value as JsonValue;
use crate::response::FunctionCallOutputContentItem;
use crate::response::ImageDetail;
const IMAGE_HELPER_EXPECTS_MESSAGE: &str = "image expects a non-empty image URL string, an object with image_url and optional detail, or a raw MCP image block";
const CODEX_IMAGE_DETAIL_META_KEY: &str = "codex/imageDetail";
pub(super) fn serialize_output_text(
scope: &mut v8::PinScope<'_, '_>,
value: v8::Local<'_, v8::Value>,
@@ -34,45 +37,25 @@ pub(super) fn serialize_output_text(
pub(super) fn normalize_output_image(
scope: &mut v8::PinScope<'_, '_>,
value: v8::Local<'_, v8::Value>,
detail_override: Option<String>,
) -> Result<FunctionCallOutputContentItem, ()> {
let result = (|| -> Result<FunctionCallOutputContentItem, String> {
let (image_url, detail) = if value.is_string() {
(value.to_rust_string_lossy(scope), None)
} else if value.is_object() && !value.is_array() {
let object = v8::Local::<v8::Object>::try_from(value).map_err(|_| {
"image expects a non-empty image URL string or an object with image_url and optional detail".to_string()
})?;
let image_url_key = v8::String::new(scope, "image_url")
.ok_or_else(|| "failed to allocate image helper keys".to_string())?;
let detail_key = v8::String::new(scope, "detail")
.ok_or_else(|| "failed to allocate image helper keys".to_string())?;
let image_url = object
.get(scope, image_url_key.into())
.filter(|value| value.is_string())
.map(|value| value.to_rust_string_lossy(scope))
.ok_or_else(|| {
"image expects a non-empty image URL string or an object with image_url and optional detail"
.to_string()
})?;
let detail = match object.get(scope, detail_key.into()) {
Some(value) if value.is_string() => Some(value.to_rust_string_lossy(scope)),
Some(value) if value.is_null() || value.is_undefined() => None,
Some(_) => return Err("image detail must be a string when provided".to_string()),
None => None,
};
(image_url, detail)
let object = v8::Local::<v8::Object>::try_from(value)
.map_err(|_| IMAGE_HELPER_EXPECTS_MESSAGE.to_string())?;
if let Some(image) = parse_non_mcp_output_image(scope, object)? {
image
} else {
parse_mcp_output_image(scope, value)?
}
} else {
return Err(
"image expects a non-empty image URL string or an object with image_url and optional detail"
.to_string(),
);
return Err(IMAGE_HELPER_EXPECTS_MESSAGE.to_string());
};
if image_url.is_empty() {
return Err(
"image expects a non-empty image URL string or an object with image_url and optional detail"
.to_string(),
);
return Err(IMAGE_HELPER_EXPECTS_MESSAGE.to_string());
}
let lower = image_url.to_ascii_lowercase();
if !(lower.starts_with("http://")
@@ -82,6 +65,7 @@ pub(super) fn normalize_output_image(
return Err("image expects an http(s) or data URL".to_string());
}
let detail = detail_override.or(detail);
let detail = match detail {
Some(detail) => {
let normalized = detail.to_ascii_lowercase();
@@ -112,6 +96,86 @@ pub(super) fn normalize_output_image(
}
}
fn parse_non_mcp_output_image(
scope: &mut v8::PinScope<'_, '_>,
object: v8::Local<'_, v8::Object>,
) -> Result<Option<(String, Option<String>)>, String> {
let image_url_key = v8::String::new(scope, "image_url")
.ok_or_else(|| "failed to allocate image helper keys".to_string())?;
let Some(image_url) = object.get(scope, image_url_key.into()) else {
return Ok(None);
};
if image_url.is_undefined() {
return Ok(None);
}
if !image_url.is_string() {
return Err(IMAGE_HELPER_EXPECTS_MESSAGE.to_string());
}
let detail_key = v8::String::new(scope, "detail")
.ok_or_else(|| "failed to allocate image helper keys".to_string())?;
let detail = parse_image_detail_value(scope, object.get(scope, detail_key.into()))?;
Ok(Some((image_url.to_rust_string_lossy(scope), detail)))
}
fn parse_mcp_output_image(
scope: &mut v8::PinScope<'_, '_>,
value: v8::Local<'_, v8::Value>,
) -> Result<(String, Option<String>), String> {
let Some(result) = v8_value_to_json(scope, value)? else {
return Err(IMAGE_HELPER_EXPECTS_MESSAGE.to_string());
};
let JsonValue::Object(result) = result else {
return Err(IMAGE_HELPER_EXPECTS_MESSAGE.to_string());
};
let Some(item_type) = result.get("type").and_then(JsonValue::as_str) else {
return Err(IMAGE_HELPER_EXPECTS_MESSAGE.to_string());
};
if item_type != "image" {
return Err(format!(
"image only accepts MCP image blocks, got \"{item_type}\""
));
}
let data = result
.get("data")
.and_then(JsonValue::as_str)
.ok_or_else(|| "image expected MCP image data".to_string())?;
if data.is_empty() {
return Err("image expected MCP image data".to_string());
}
let image_url = if data.to_ascii_lowercase().starts_with("data:") {
data.to_string()
} else {
let mime_type = result
.get("mimeType")
.or_else(|| result.get("mime_type"))
.and_then(JsonValue::as_str)
.filter(|mime_type| !mime_type.is_empty())
.unwrap_or("application/octet-stream");
format!("data:{mime_type};base64,{data}")
};
let detail = result
.get("_meta")
.and_then(JsonValue::as_object)
.and_then(|meta| meta.get(CODEX_IMAGE_DETAIL_META_KEY))
.and_then(JsonValue::as_str)
.filter(|detail| *detail == "original")
.map(str::to_string);
Ok((image_url, detail))
}
fn parse_image_detail_value<'s>(
scope: &mut v8::PinScope<'s, '_>,
value: Option<v8::Local<'s, v8::Value>>,
) -> Result<Option<String>, String> {
match value {
Some(value) if value.is_string() => Ok(Some(value.to_rust_string_lossy(scope))),
Some(value) if value.is_null() || value.is_undefined() => Ok(None),
Some(_) => Err("image detail must be a string when provided".to_string()),
None => Ok(None),
}
}
pub(super) fn v8_value_to_json(
scope: &mut v8::PinScope<'_, '_>,
value: v8::Local<'_, v8::Value>,

View File

@@ -684,6 +684,154 @@ text(JSON.stringify(returnsUndefined));
);
}
#[tokio::test]
async fn image_helper_accepts_raw_mcp_image_block_with_original_detail() {
let service = CodeModeService::new();
let response = service
.execute(ExecuteRequest {
source: r#"
image({
type: "image",
data: "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==",
mimeType: "image/png",
_meta: { "codex/imageDetail": "original" },
});
"#
.to_string(),
yield_time_ms: None,
..execute_request("")
})
.await
.unwrap();
assert_eq!(
response,
RuntimeResponse::Result {
cell_id: "1".to_string(),
content_items: vec![FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==".to_string(),
detail: Some(crate::ImageDetail::Original),
}],
stored_values: HashMap::new(),
error_text: None,
}
);
}
#[tokio::test]
async fn image_helper_second_arg_overrides_explicit_object_detail() {
let service = CodeModeService::new();
let response = service
.execute(ExecuteRequest {
source: r#"
image(
{
image_url: "https://example.com/image.jpg",
detail: "low",
},
"original",
);
"#
.to_string(),
yield_time_ms: None,
..execute_request("")
})
.await
.unwrap();
assert_eq!(
response,
RuntimeResponse::Result {
cell_id: "1".to_string(),
content_items: vec![FunctionCallOutputContentItem::InputImage {
image_url: "https://example.com/image.jpg".to_string(),
detail: Some(crate::ImageDetail::Original),
}],
stored_values: HashMap::new(),
error_text: None,
}
);
}
#[tokio::test]
async fn image_helper_second_arg_overrides_raw_mcp_image_detail() {
let service = CodeModeService::new();
let response = service
.execute(ExecuteRequest {
source: r#"
image(
{
type: "image",
data: "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==",
mimeType: "image/png",
_meta: { "codex/imageDetail": "original" },
},
"low",
);
"#
.to_string(),
yield_time_ms: None,
..execute_request("")
})
.await
.unwrap();
assert_eq!(
response,
RuntimeResponse::Result {
cell_id: "1".to_string(),
content_items: vec![FunctionCallOutputContentItem::InputImage {
image_url: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==".to_string(),
detail: Some(crate::ImageDetail::Low),
}],
stored_values: HashMap::new(),
error_text: None,
}
);
}
#[tokio::test]
async fn image_helper_rejects_raw_mcp_result_container() {
let service = CodeModeService::new();
let response = service
.execute(ExecuteRequest {
source: r#"
image({
content: [
{
type: "image",
data: "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==",
mimeType: "image/png",
_meta: { "codex/imageDetail": "original" },
},
],
isError: false,
});
"#
.to_string(),
yield_time_ms: None,
..execute_request("")
})
.await
.unwrap();
assert_eq!(
response,
RuntimeResponse::Result {
cell_id: "1".to_string(),
content_items: Vec::new(),
stored_values: HashMap::new(),
error_text: Some(
"image expects a non-empty image URL string, an object with image_url and optional detail, or a raw MCP image block".to_string(),
),
}
);
}
#[tokio::test]
async fn terminate_waits_for_runtime_shutdown_before_responding() {
let inner = test_inner();