mirror of
https://github.com/openai/codex.git
synced 2026-05-03 21:01:55 +03:00
Support original-detail metadata on MCP image outputs (#17714)
## Summary - honor `_meta["codex/imageDetail"] == "original"` on MCP image content and map it to `detail: "original"` where supported - strip that detail back out when the active model does not support original-detail image inputs - update code-mode `image(...)` to accept individual MCP image blocks - teach `js_repl` / `codex.emitImage(...)` to preserve the same hint from raw MCP image outputs - document the new `_meta` contract and add generic RMCP-backed coverage across protocol, core, code-mode, and js_repl paths
This commit is contained in:
committed by
GitHub
parent
17d94bd1e3
commit
9e2fc31854
@@ -24,7 +24,7 @@ const EXEC_DESCRIPTION_TEMPLATE: &str = r#"Run JavaScript code to orchestrate/co
|
||||
- Global helpers:
|
||||
- `exit()`: Immediately ends the current script successfully (like an early return from the top level).
|
||||
- `text(value: string | number | boolean | undefined | null)`: Appends a text item. Non-string values are stringified with `JSON.stringify(...)` when possible.
|
||||
- `image(imageUrlOrItem: string | { image_url: string; detail?: "auto" | "low" | "high" | "original" | null })`: Appends an image item. `image_url` can be an HTTPS URL or a base64-encoded `data:` URL.
|
||||
- `image(imageUrlOrItem: string | { image_url: string; detail?: "auto" | "low" | "high" | "original" | null } | ImageContent, detail?: "auto" | "low" | "high" | "original" | null)`: Appends an image item. `image_url` can be an HTTPS URL or a base64-encoded `data:` URL. To forward an MCP tool image, pass an individual `ImageContent` block from `result.content`, for example `image(result.content[0])`. MCP image blocks may request original detail with `_meta: { "codex/imageDetail": "original" }`. When provided, the second `detail` argument overrides any detail embedded in the first argument.
|
||||
- `store(key: string, value: any)`: stores a serializable value under a string key for later `exec` calls in the same session.
|
||||
- `load(key: string)`: returns the stored value for a string key, or `undefined` if it is missing.
|
||||
- `notify(value: string | number | boolean | undefined | null)`: immediately injects an extra `custom_tool_call_output` for the current `exec` call. Values are stringified like `text(...)`.
|
||||
|
||||
@@ -109,7 +109,20 @@ pub(super) fn image_callback(
|
||||
} else {
|
||||
args.get(0)
|
||||
};
|
||||
let image_item = match normalize_output_image(scope, value) {
|
||||
let detail_override = if args.length() < 2 {
|
||||
None
|
||||
} else {
|
||||
let detail = args.get(1);
|
||||
if detail.is_string() {
|
||||
Some(detail.to_rust_string_lossy(scope))
|
||||
} else if detail.is_null() || detail.is_undefined() {
|
||||
None
|
||||
} else {
|
||||
throw_type_error(scope, "image detail must be a string when provided");
|
||||
return;
|
||||
}
|
||||
};
|
||||
let image_item = match normalize_output_image(scope, value, detail_override) {
|
||||
Ok(image_item) => image_item,
|
||||
Err(()) => return,
|
||||
};
|
||||
|
||||
@@ -3,6 +3,9 @@ use serde_json::Value as JsonValue;
|
||||
use crate::response::FunctionCallOutputContentItem;
|
||||
use crate::response::ImageDetail;
|
||||
|
||||
const IMAGE_HELPER_EXPECTS_MESSAGE: &str = "image expects a non-empty image URL string, an object with image_url and optional detail, or a raw MCP image block";
|
||||
const CODEX_IMAGE_DETAIL_META_KEY: &str = "codex/imageDetail";
|
||||
|
||||
pub(super) fn serialize_output_text(
|
||||
scope: &mut v8::PinScope<'_, '_>,
|
||||
value: v8::Local<'_, v8::Value>,
|
||||
@@ -34,45 +37,25 @@ pub(super) fn serialize_output_text(
|
||||
pub(super) fn normalize_output_image(
|
||||
scope: &mut v8::PinScope<'_, '_>,
|
||||
value: v8::Local<'_, v8::Value>,
|
||||
detail_override: Option<String>,
|
||||
) -> Result<FunctionCallOutputContentItem, ()> {
|
||||
let result = (|| -> Result<FunctionCallOutputContentItem, String> {
|
||||
let (image_url, detail) = if value.is_string() {
|
||||
(value.to_rust_string_lossy(scope), None)
|
||||
} else if value.is_object() && !value.is_array() {
|
||||
let object = v8::Local::<v8::Object>::try_from(value).map_err(|_| {
|
||||
"image expects a non-empty image URL string or an object with image_url and optional detail".to_string()
|
||||
})?;
|
||||
let image_url_key = v8::String::new(scope, "image_url")
|
||||
.ok_or_else(|| "failed to allocate image helper keys".to_string())?;
|
||||
let detail_key = v8::String::new(scope, "detail")
|
||||
.ok_or_else(|| "failed to allocate image helper keys".to_string())?;
|
||||
let image_url = object
|
||||
.get(scope, image_url_key.into())
|
||||
.filter(|value| value.is_string())
|
||||
.map(|value| value.to_rust_string_lossy(scope))
|
||||
.ok_or_else(|| {
|
||||
"image expects a non-empty image URL string or an object with image_url and optional detail"
|
||||
.to_string()
|
||||
})?;
|
||||
let detail = match object.get(scope, detail_key.into()) {
|
||||
Some(value) if value.is_string() => Some(value.to_rust_string_lossy(scope)),
|
||||
Some(value) if value.is_null() || value.is_undefined() => None,
|
||||
Some(_) => return Err("image detail must be a string when provided".to_string()),
|
||||
None => None,
|
||||
};
|
||||
(image_url, detail)
|
||||
let object = v8::Local::<v8::Object>::try_from(value)
|
||||
.map_err(|_| IMAGE_HELPER_EXPECTS_MESSAGE.to_string())?;
|
||||
if let Some(image) = parse_non_mcp_output_image(scope, object)? {
|
||||
image
|
||||
} else {
|
||||
parse_mcp_output_image(scope, value)?
|
||||
}
|
||||
} else {
|
||||
return Err(
|
||||
"image expects a non-empty image URL string or an object with image_url and optional detail"
|
||||
.to_string(),
|
||||
);
|
||||
return Err(IMAGE_HELPER_EXPECTS_MESSAGE.to_string());
|
||||
};
|
||||
|
||||
if image_url.is_empty() {
|
||||
return Err(
|
||||
"image expects a non-empty image URL string or an object with image_url and optional detail"
|
||||
.to_string(),
|
||||
);
|
||||
return Err(IMAGE_HELPER_EXPECTS_MESSAGE.to_string());
|
||||
}
|
||||
let lower = image_url.to_ascii_lowercase();
|
||||
if !(lower.starts_with("http://")
|
||||
@@ -82,6 +65,7 @@ pub(super) fn normalize_output_image(
|
||||
return Err("image expects an http(s) or data URL".to_string());
|
||||
}
|
||||
|
||||
let detail = detail_override.or(detail);
|
||||
let detail = match detail {
|
||||
Some(detail) => {
|
||||
let normalized = detail.to_ascii_lowercase();
|
||||
@@ -112,6 +96,86 @@ pub(super) fn normalize_output_image(
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_non_mcp_output_image(
|
||||
scope: &mut v8::PinScope<'_, '_>,
|
||||
object: v8::Local<'_, v8::Object>,
|
||||
) -> Result<Option<(String, Option<String>)>, String> {
|
||||
let image_url_key = v8::String::new(scope, "image_url")
|
||||
.ok_or_else(|| "failed to allocate image helper keys".to_string())?;
|
||||
let Some(image_url) = object.get(scope, image_url_key.into()) else {
|
||||
return Ok(None);
|
||||
};
|
||||
if image_url.is_undefined() {
|
||||
return Ok(None);
|
||||
}
|
||||
if !image_url.is_string() {
|
||||
return Err(IMAGE_HELPER_EXPECTS_MESSAGE.to_string());
|
||||
}
|
||||
let detail_key = v8::String::new(scope, "detail")
|
||||
.ok_or_else(|| "failed to allocate image helper keys".to_string())?;
|
||||
let detail = parse_image_detail_value(scope, object.get(scope, detail_key.into()))?;
|
||||
Ok(Some((image_url.to_rust_string_lossy(scope), detail)))
|
||||
}
|
||||
|
||||
fn parse_mcp_output_image(
|
||||
scope: &mut v8::PinScope<'_, '_>,
|
||||
value: v8::Local<'_, v8::Value>,
|
||||
) -> Result<(String, Option<String>), String> {
|
||||
let Some(result) = v8_value_to_json(scope, value)? else {
|
||||
return Err(IMAGE_HELPER_EXPECTS_MESSAGE.to_string());
|
||||
};
|
||||
let JsonValue::Object(result) = result else {
|
||||
return Err(IMAGE_HELPER_EXPECTS_MESSAGE.to_string());
|
||||
};
|
||||
let Some(item_type) = result.get("type").and_then(JsonValue::as_str) else {
|
||||
return Err(IMAGE_HELPER_EXPECTS_MESSAGE.to_string());
|
||||
};
|
||||
if item_type != "image" {
|
||||
return Err(format!(
|
||||
"image only accepts MCP image blocks, got \"{item_type}\""
|
||||
));
|
||||
}
|
||||
let data = result
|
||||
.get("data")
|
||||
.and_then(JsonValue::as_str)
|
||||
.ok_or_else(|| "image expected MCP image data".to_string())?;
|
||||
if data.is_empty() {
|
||||
return Err("image expected MCP image data".to_string());
|
||||
}
|
||||
|
||||
let image_url = if data.to_ascii_lowercase().starts_with("data:") {
|
||||
data.to_string()
|
||||
} else {
|
||||
let mime_type = result
|
||||
.get("mimeType")
|
||||
.or_else(|| result.get("mime_type"))
|
||||
.and_then(JsonValue::as_str)
|
||||
.filter(|mime_type| !mime_type.is_empty())
|
||||
.unwrap_or("application/octet-stream");
|
||||
format!("data:{mime_type};base64,{data}")
|
||||
};
|
||||
let detail = result
|
||||
.get("_meta")
|
||||
.and_then(JsonValue::as_object)
|
||||
.and_then(|meta| meta.get(CODEX_IMAGE_DETAIL_META_KEY))
|
||||
.and_then(JsonValue::as_str)
|
||||
.filter(|detail| *detail == "original")
|
||||
.map(str::to_string);
|
||||
Ok((image_url, detail))
|
||||
}
|
||||
|
||||
fn parse_image_detail_value<'s>(
|
||||
scope: &mut v8::PinScope<'s, '_>,
|
||||
value: Option<v8::Local<'s, v8::Value>>,
|
||||
) -> Result<Option<String>, String> {
|
||||
match value {
|
||||
Some(value) if value.is_string() => Ok(Some(value.to_rust_string_lossy(scope))),
|
||||
Some(value) if value.is_null() || value.is_undefined() => Ok(None),
|
||||
Some(_) => Err("image detail must be a string when provided".to_string()),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn v8_value_to_json(
|
||||
scope: &mut v8::PinScope<'_, '_>,
|
||||
value: v8::Local<'_, v8::Value>,
|
||||
|
||||
@@ -684,6 +684,154 @@ text(JSON.stringify(returnsUndefined));
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn image_helper_accepts_raw_mcp_image_block_with_original_detail() {
|
||||
let service = CodeModeService::new();
|
||||
|
||||
let response = service
|
||||
.execute(ExecuteRequest {
|
||||
source: r#"
|
||||
image({
|
||||
type: "image",
|
||||
data: "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==",
|
||||
mimeType: "image/png",
|
||||
_meta: { "codex/imageDetail": "original" },
|
||||
});
|
||||
"#
|
||||
.to_string(),
|
||||
yield_time_ms: None,
|
||||
..execute_request("")
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
response,
|
||||
RuntimeResponse::Result {
|
||||
cell_id: "1".to_string(),
|
||||
content_items: vec![FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==".to_string(),
|
||||
detail: Some(crate::ImageDetail::Original),
|
||||
}],
|
||||
stored_values: HashMap::new(),
|
||||
error_text: None,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn image_helper_second_arg_overrides_explicit_object_detail() {
|
||||
let service = CodeModeService::new();
|
||||
|
||||
let response = service
|
||||
.execute(ExecuteRequest {
|
||||
source: r#"
|
||||
image(
|
||||
{
|
||||
image_url: "https://example.com/image.jpg",
|
||||
detail: "low",
|
||||
},
|
||||
"original",
|
||||
);
|
||||
"#
|
||||
.to_string(),
|
||||
yield_time_ms: None,
|
||||
..execute_request("")
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
response,
|
||||
RuntimeResponse::Result {
|
||||
cell_id: "1".to_string(),
|
||||
content_items: vec![FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "https://example.com/image.jpg".to_string(),
|
||||
detail: Some(crate::ImageDetail::Original),
|
||||
}],
|
||||
stored_values: HashMap::new(),
|
||||
error_text: None,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn image_helper_second_arg_overrides_raw_mcp_image_detail() {
|
||||
let service = CodeModeService::new();
|
||||
|
||||
let response = service
|
||||
.execute(ExecuteRequest {
|
||||
source: r#"
|
||||
image(
|
||||
{
|
||||
type: "image",
|
||||
data: "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==",
|
||||
mimeType: "image/png",
|
||||
_meta: { "codex/imageDetail": "original" },
|
||||
},
|
||||
"low",
|
||||
);
|
||||
"#
|
||||
.to_string(),
|
||||
yield_time_ms: None,
|
||||
..execute_request("")
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
response,
|
||||
RuntimeResponse::Result {
|
||||
cell_id: "1".to_string(),
|
||||
content_items: vec![FunctionCallOutputContentItem::InputImage {
|
||||
image_url: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==".to_string(),
|
||||
detail: Some(crate::ImageDetail::Low),
|
||||
}],
|
||||
stored_values: HashMap::new(),
|
||||
error_text: None,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn image_helper_rejects_raw_mcp_result_container() {
|
||||
let service = CodeModeService::new();
|
||||
|
||||
let response = service
|
||||
.execute(ExecuteRequest {
|
||||
source: r#"
|
||||
image({
|
||||
content: [
|
||||
{
|
||||
type: "image",
|
||||
data: "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==",
|
||||
mimeType: "image/png",
|
||||
_meta: { "codex/imageDetail": "original" },
|
||||
},
|
||||
],
|
||||
isError: false,
|
||||
});
|
||||
"#
|
||||
.to_string(),
|
||||
yield_time_ms: None,
|
||||
..execute_request("")
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
response,
|
||||
RuntimeResponse::Result {
|
||||
cell_id: "1".to_string(),
|
||||
content_items: Vec::new(),
|
||||
stored_values: HashMap::new(),
|
||||
error_text: Some(
|
||||
"image expects a non-empty image URL string, an object with image_url and optional detail, or a raw MCP image block".to_string(),
|
||||
),
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn terminate_waits_for_runtime_shutdown_before_responding() {
|
||||
let inner = test_inner();
|
||||
|
||||
Reference in New Issue
Block a user