image-gen-core (#13290)

Core tool-calling for image-gen, handles requesting and receiving logic
for images using response API
This commit is contained in:
Won Park
2026-03-03 23:11:28 -08:00
committed by GitHub
parent 4f6c4bb143
commit fa2306b303
22 changed files with 766 additions and 45 deletions

View File

@@ -23,6 +23,7 @@ use codex_protocol::dynamic_tools::DynamicToolSpec;
use codex_protocol::models::VIEW_IMAGE_TOOL_NAME;
use codex_protocol::openai_models::ApplyPatchToolType;
use codex_protocol::openai_models::ConfigShellToolType;
use codex_protocol::openai_models::InputModality;
use codex_protocol::openai_models::ModelInfo;
use codex_protocol::protocol::SessionSource;
use codex_protocol::protocol::SubAgentSource;
@@ -35,7 +36,6 @@ use std::collections::HashMap;
const SEARCH_TOOL_BM25_DESCRIPTION_TEMPLATE: &str =
include_str!("../../templates/search_tool/tool_description.md");
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub enum ShellCommandBackendConfig {
Classic,
@@ -49,6 +49,7 @@ pub(crate) struct ToolsConfig {
pub allow_login_shell: bool,
pub apply_patch_tool_type: Option<ApplyPatchToolType>,
pub web_search_mode: Option<WebSearchMode>,
pub image_gen_tool: bool,
pub agent_roles: BTreeMap<String, AgentRoleConfig>,
pub search_tool: bool,
pub request_permission_enabled: bool,
@@ -86,6 +87,8 @@ impl ToolsConfig {
features.enabled(Feature::DefaultModeRequestUserInput);
let include_search_tool = features.enabled(Feature::Apps);
let include_artifact_tools = features.enabled(Feature::Artifact);
let include_image_gen_tool =
features.enabled(Feature::ImageGeneration) && supports_image_generation(model_info);
let include_agent_jobs = include_collab_tools && features.enabled(Feature::Sqlite);
let request_permission_enabled = features.enabled(Feature::RequestPermissions);
let shell_command_backend =
@@ -135,6 +138,7 @@ impl ToolsConfig {
allow_login_shell: true,
apply_patch_tool_type,
web_search_mode: *web_search_mode,
image_gen_tool: include_image_gen_tool,
agent_roles: BTreeMap::new(),
search_tool: include_search_tool,
request_permission_enabled,
@@ -160,6 +164,10 @@ impl ToolsConfig {
}
}
fn supports_image_generation(model_info: &ModelInfo) -> bool {
model_info.input_modalities.contains(&InputModality::Image)
}
/// Generic JSONSchema subset needed for our tool definitions
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(tag = "type", rename_all = "lowercase")]
@@ -1917,6 +1925,10 @@ pub(crate) fn build_specs(
Some(WebSearchMode::Disabled) | None => {}
}
if config.image_gen_tool {
builder.push_spec(ToolSpec::ImageGeneration {});
}
builder.push_spec_with_parallel_support(create_view_image_tool(), true);
builder.register_handler("view_image", view_image_handler);
@@ -1995,6 +2007,7 @@ mod tests {
use crate::models_manager::manager::ModelsManager;
use crate::models_manager::model_info::with_config_overrides;
use crate::tools::registry::ConfiguredToolSpec;
use codex_protocol::openai_models::InputModality;
use codex_protocol::openai_models::ModelInfo;
use codex_protocol::openai_models::ModelsResponse;
use pretty_assertions::assert_eq;
@@ -2047,6 +2060,7 @@ mod tests {
match tool {
ToolSpec::Function(ResponsesApiTool { name, .. }) => name,
ToolSpec::LocalShell {} => "local_shell",
ToolSpec::ImageGeneration {} => "image_generation",
ToolSpec::WebSearch { .. } => "web_search",
ToolSpec::Freeform(FreeformTool { name, .. }) => name,
}
@@ -2125,7 +2139,10 @@ mod tests {
ToolSpec::Function(ResponsesApiTool { parameters, .. }) => {
strip_descriptions_schema(parameters);
}
ToolSpec::Freeform(_) | ToolSpec::LocalShell {} | ToolSpec::WebSearch { .. } => {}
ToolSpec::Freeform(_)
| ToolSpec::LocalShell {}
| ToolSpec::ImageGeneration {}
| ToolSpec::WebSearch { .. } => {}
}
}
@@ -2374,6 +2391,56 @@ mod tests {
assert_contains_tool_names(&tools, &["js_repl", "js_repl_reset"]);
}
#[test]
fn image_generation_tools_require_feature_and_supported_model() {
let config = test_config();
let mut supported_model_info =
ModelsManager::construct_model_info_offline_for_tests("gpt-5.2", &config);
supported_model_info.slug = "custom/gpt-5.2-variant".to_string();
let mut unsupported_model_info = supported_model_info.clone();
unsupported_model_info.input_modalities = vec![InputModality::Text];
let default_features = Features::with_defaults();
let mut image_generation_features = default_features.clone();
image_generation_features.enable(Feature::ImageGeneration);
let default_tools_config = ToolsConfig::new(&ToolsConfigParams {
model_info: &supported_model_info,
features: &default_features,
web_search_mode: Some(WebSearchMode::Cached),
session_source: SessionSource::Cli,
});
let (default_tools, _) = build_specs(&default_tools_config, None, None, &[]).build();
assert!(
!default_tools
.iter()
.any(|tool| tool.spec.name() == "image_generation"),
"image_generation should be disabled by default"
);
let supported_tools_config = ToolsConfig::new(&ToolsConfigParams {
model_info: &supported_model_info,
features: &image_generation_features,
web_search_mode: Some(WebSearchMode::Cached),
session_source: SessionSource::Cli,
});
let (supported_tools, _) = build_specs(&supported_tools_config, None, None, &[]).build();
assert_contains_tool_names(&supported_tools, &["image_generation"]);
let tools_config = ToolsConfig::new(&ToolsConfigParams {
model_info: &unsupported_model_info,
features: &image_generation_features,
web_search_mode: Some(WebSearchMode::Cached),
session_source: SessionSource::Cli,
});
let (tools, _) = build_specs(&tools_config, None, None, &[]).build();
assert!(
!tools
.iter()
.any(|tool| tool.spec.name() == "image_generation"),
"image_generation should be disabled for unsupported models"
);
}
#[test]
fn js_repl_freeform_grammar_blocks_common_non_js_prefixes() {
let ToolSpec::Freeform(FreeformTool { format, .. }) = create_js_repl_tool() else {