mirror of
https://github.com/openai/codex.git
synced 2026-05-06 06:12:59 +03:00
Compare commits
4 Commits
xli-codex/
...
andrey/ren
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b8f8ebb995 | ||
|
|
3d4c524ee2 | ||
|
|
b0ca1e16a3 | ||
|
|
8df0e45f9b |
@@ -6,14 +6,14 @@ use crate::PUBLIC_TOOL_NAME;
|
||||
|
||||
const MAX_JS_SAFE_INTEGER: u64 = (1_u64 << 53) - 1;
|
||||
const CODE_MODE_ONLY_PREFACE: &str =
|
||||
"Use `exec/wait` tool to run all other tools, do not attempt to use any other tools directly";
|
||||
"Use `exec/wait` to call all other functions, do not attempt to call any other functions directly";
|
||||
const EXEC_DESCRIPTION_TEMPLATE: &str = r#"## exec
|
||||
- Runs raw JavaScript in an isolated context (no Node, no file system, or network access, no console).
|
||||
- Send raw JavaScript source text, not JSON, quoted strings, or markdown code fences.
|
||||
- You may optionally start the tool input with a first-line pragma like `// @exec: {"yield_time_ms": 10000, "max_output_tokens": 1000}`.
|
||||
- You may optionally start the exec input with a first-line pragma like `// @exec: {"yield_time_ms": 10000, "max_output_tokens": 1000}`.
|
||||
- `yield_time_ms` asks `exec` to yield early after that many milliseconds if the script is still running.
|
||||
- `max_output_tokens` sets the token budget for direct `exec` results. By default the result is truncated to 10000 tokens.
|
||||
- All nested tools are available on the global `tools` object, for example `await tools.exec_command(...)`. Tool names are exposed as normalized JavaScript identifiers, for example `await tools.mcp__ologs__get_profile(...)`.
|
||||
- All nested functions are available on the global `functions` object, for example `await functions.exec_command(...)`. Tool names are exposed as normalized JavaScript identifiers, for example `await functions.mcp__ologs__get_profile(...)`.
|
||||
- Tool methods take either string or object as parameter.
|
||||
- They return either a structured value or a string based on the description above.
|
||||
|
||||
@@ -24,7 +24,7 @@ const EXEC_DESCRIPTION_TEMPLATE: &str = r#"## exec
|
||||
- `store(key: string, value: any)`: stores a serializable value under a string key for later `exec` calls in the same session.
|
||||
- `load(key: string)`: returns the stored value for a string key, or `undefined` if it is missing.
|
||||
- `notify(value: string | number | boolean | undefined | null)`: immediately injects an extra `custom_tool_call_output` for the current `exec` call. Values are stringified like `text(...)`.
|
||||
- `ALL_TOOLS`: metadata for the enabled nested tools as `{ name, description }` entries.
|
||||
- `ALL_FUNCTIONS`: metadata for the enabled nested functions as `{ name, description }` entries.
|
||||
- `yield_control()`: yields the accumulated output to the model immediately while the script keeps running."#;
|
||||
const WAIT_DESCRIPTION_TEMPLATE: &str = r#"- Use `wait` only after `exec` returns `Script running with cell ID ...`.
|
||||
- `cell_id` identifies the running `exec` cell to resume.
|
||||
@@ -153,7 +153,7 @@ pub fn parse_exec_source(input: &str) -> Result<ParsedExecSource, String> {
|
||||
Ok(args)
|
||||
}
|
||||
|
||||
pub fn is_code_mode_nested_tool(tool_name: &str) -> bool {
|
||||
pub fn is_code_mode_function(tool_name: &str) -> bool {
|
||||
tool_name != crate::PUBLIC_TOOL_NAME && tool_name != crate::WAIT_TOOL_NAME
|
||||
}
|
||||
|
||||
@@ -171,18 +171,18 @@ pub fn build_exec_tool_description(
|
||||
];
|
||||
|
||||
if !enabled_tools.is_empty() {
|
||||
let nested_tool_reference = enabled_tools
|
||||
let function_description = enabled_tools
|
||||
.iter()
|
||||
.map(|(name, nested_description)| {
|
||||
.map(|(name, description)| {
|
||||
let global_name = normalize_code_mode_identifier(name);
|
||||
format!(
|
||||
"### `{global_name}` (`{name}`)\n{}",
|
||||
nested_description.trim()
|
||||
description.trim()
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n\n");
|
||||
sections.push(nested_tool_reference);
|
||||
sections.push(function_description);
|
||||
}
|
||||
|
||||
sections.join("\n\n")
|
||||
@@ -248,7 +248,7 @@ pub fn append_code_mode_sample(
|
||||
output_type: String,
|
||||
) -> String {
|
||||
let declaration = format!(
|
||||
"declare const tools: {{ {} }};",
|
||||
"declare const functions: {{ {} }};",
|
||||
render_code_mode_tool_declaration(tool_name, input_name, input_type, output_type)
|
||||
);
|
||||
format!("{description}\n\nexec tool declaration:\n```ts\n{declaration}\n```")
|
||||
@@ -538,7 +538,7 @@ mod tests {
|
||||
};
|
||||
|
||||
let description = augment_tool_definition(definition).description;
|
||||
assert!(description.contains("declare const tools"));
|
||||
assert!(description.contains("declare const functions"));
|
||||
assert!(
|
||||
description.contains(
|
||||
"hidden_dynamic_tool(args: { city: string; }): Promise<{ ok: boolean; }>;"
|
||||
@@ -547,7 +547,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn code_mode_only_description_includes_nested_tools() {
|
||||
fn code_mode_only_description_includes_code_mode_functions() {
|
||||
let description = build_exec_tool_description(
|
||||
&[("foo".to_string(), "bar".to_string())],
|
||||
/*code_mode_only*/ true,
|
||||
|
||||
@@ -10,7 +10,7 @@ pub use description::append_code_mode_sample;
|
||||
pub use description::augment_tool_definition;
|
||||
pub use description::build_exec_tool_description;
|
||||
pub use description::build_wait_tool_description;
|
||||
pub use description::is_code_mode_nested_tool;
|
||||
pub use description::is_code_mode_function;
|
||||
pub use description::normalize_code_mode_identifier;
|
||||
pub use description::parse_exec_source;
|
||||
pub use description::render_json_schema_to_typescript;
|
||||
|
||||
@@ -16,8 +16,8 @@ pub(super) fn install_globals(scope: &mut v8::PinScope<'_, '_>) -> Result<(), St
|
||||
return Err("failed to remove global `console`".to_string());
|
||||
}
|
||||
|
||||
let tools = build_tools_object(scope)?;
|
||||
let all_tools = build_all_tools_value(scope)?;
|
||||
let functions = build_functions_object(scope)?;
|
||||
let all_functions = build_all_functions_object(scope)?;
|
||||
let text = helper_function(scope, "text", text_callback)?;
|
||||
let image = helper_function(scope, "image", image_callback)?;
|
||||
let store = helper_function(scope, "store", store_callback)?;
|
||||
@@ -26,8 +26,11 @@ pub(super) fn install_globals(scope: &mut v8::PinScope<'_, '_>) -> Result<(), St
|
||||
let yield_control = helper_function(scope, "yield_control", yield_control_callback)?;
|
||||
let exit = helper_function(scope, "exit", exit_callback)?;
|
||||
|
||||
set_global(scope, global, "tools", tools.into())?;
|
||||
set_global(scope, global, "ALL_TOOLS", all_tools)?;
|
||||
// We use the word "tools" everywhere in the codebase to refer to functions the model can call.
|
||||
// The model sees these things as callables in the "functions" namespace. So we use "functions"
|
||||
// as the name of the JS global that holds its callable functions.
|
||||
set_global(scope, global, "functions", functions.into())?;
|
||||
set_global(scope, global, "ALL_FUNCTIONS", all_functions)?;
|
||||
set_global(scope, global, "text", text.into())?;
|
||||
set_global(scope, global, "image", image.into())?;
|
||||
set_global(scope, global, "store", store.into())?;
|
||||
@@ -38,10 +41,10 @@ pub(super) fn install_globals(scope: &mut v8::PinScope<'_, '_>) -> Result<(), St
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn build_tools_object<'s>(
|
||||
fn build_functions_object<'s>(
|
||||
scope: &mut v8::PinScope<'s, '_>,
|
||||
) -> Result<v8::Local<'s, v8::Object>, String> {
|
||||
let tools = v8::Object::new(scope);
|
||||
let functions = v8::Object::new(scope);
|
||||
let enabled_tools = scope
|
||||
.get_slot::<RuntimeState>()
|
||||
.map(|state| state.enabled_tools.clone())
|
||||
@@ -49,14 +52,14 @@ fn build_tools_object<'s>(
|
||||
|
||||
for tool in enabled_tools {
|
||||
let name = v8::String::new(scope, &tool.global_name)
|
||||
.ok_or_else(|| "failed to allocate tool name".to_string())?;
|
||||
.ok_or_else(|| "failed to allocate tool name in code mode functions object".to_string())?;
|
||||
let function = tool_function(scope, &tool.tool_name)?;
|
||||
tools.set(scope, name.into(), function.into());
|
||||
functions.set(scope, name.into(), function.into());
|
||||
}
|
||||
Ok(tools)
|
||||
Ok(functions)
|
||||
}
|
||||
|
||||
fn build_all_tools_value<'s>(
|
||||
fn build_all_functions_object<'s>(
|
||||
scope: &mut v8::PinScope<'s, '_>,
|
||||
) -> Result<v8::Local<'s, v8::Value>, String> {
|
||||
let enabled_tools = scope
|
||||
@@ -65,25 +68,25 @@ fn build_all_tools_value<'s>(
|
||||
.unwrap_or_default();
|
||||
let array = v8::Array::new(scope, enabled_tools.len() as i32);
|
||||
let name_key = v8::String::new(scope, "name")
|
||||
.ok_or_else(|| "failed to allocate ALL_TOOLS name key".to_string())?;
|
||||
.ok_or_else(|| "failed to allocate ALL_FUNCTIONS name key".to_string())?;
|
||||
let description_key = v8::String::new(scope, "description")
|
||||
.ok_or_else(|| "failed to allocate ALL_TOOLS description key".to_string())?;
|
||||
.ok_or_else(|| "failed to allocate ALL_FUNCTIONS description key".to_string())?;
|
||||
|
||||
for (index, tool) in enabled_tools.iter().enumerate() {
|
||||
let item = v8::Object::new(scope);
|
||||
let name = v8::String::new(scope, &tool.global_name)
|
||||
.ok_or_else(|| "failed to allocate ALL_TOOLS name".to_string())?;
|
||||
.ok_or_else(|| "failed to allocate ALL_FUNCTIONS name".to_string())?;
|
||||
let description = v8::String::new(scope, &tool.description)
|
||||
.ok_or_else(|| "failed to allocate ALL_TOOLS description".to_string())?;
|
||||
.ok_or_else(|| "failed to allocate ALL_FUNCTIONS description".to_string())?;
|
||||
|
||||
if item.set(scope, name_key.into(), name.into()) != Some(true) {
|
||||
return Err("failed to set ALL_TOOLS name".to_string());
|
||||
return Err("failed to set ALL_FUNCTIONS name".to_string());
|
||||
}
|
||||
if item.set(scope, description_key.into(), description.into()) != Some(true) {
|
||||
return Err("failed to set ALL_TOOLS description".to_string());
|
||||
return Err("failed to set ALL_FUNCTIONS description".to_string());
|
||||
}
|
||||
if array.set_index(scope, index as u32, item.into()) != Some(true) {
|
||||
return Err("failed to append ALL_TOOLS metadata".to_string());
|
||||
return Err("failed to append ALL_FUNCTIONS metadata".to_string());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -118,7 +118,7 @@ impl CodeModeTurnHost for CoreTurnHost {
|
||||
input: Option<JsonValue>,
|
||||
cancellation_token: CancellationToken,
|
||||
) -> Result<JsonValue, String> {
|
||||
call_nested_tool(
|
||||
call_code_mode_function(
|
||||
self.exec.clone(),
|
||||
self.tool_runtime.clone(),
|
||||
tool_name,
|
||||
@@ -257,7 +257,7 @@ pub(super) async fn build_enabled_tools(
|
||||
|
||||
fn enabled_tool_from_spec(spec: ToolSpec) -> Option<codex_code_mode::ToolDefinition> {
|
||||
let tool_name = spec.name().to_string();
|
||||
if !codex_code_mode::is_code_mode_nested_tool(&tool_name) {
|
||||
if !codex_code_mode::is_code_mode_function(&tool_name) {
|
||||
return None;
|
||||
}
|
||||
|
||||
@@ -284,7 +284,7 @@ fn enabled_tool_from_spec(spec: ToolSpec) -> Option<codex_code_mode::ToolDefinit
|
||||
}
|
||||
|
||||
async fn build_nested_router(exec: &ExecContext) -> ToolRouter {
|
||||
let nested_tools_config = exec.turn.tools_config.for_code_mode_nested_tools();
|
||||
let code_mode_functions_config = exec.turn.tools_config.for_code_mode_nested_tools();
|
||||
let mcp_tools = exec
|
||||
.session
|
||||
.services
|
||||
@@ -298,7 +298,7 @@ async fn build_nested_router(exec: &ExecContext) -> ToolRouter {
|
||||
.collect();
|
||||
|
||||
ToolRouter::from_config(
|
||||
&nested_tools_config,
|
||||
&code_mode_functions_config,
|
||||
ToolRouterParams {
|
||||
mcp_tools: Some(mcp_tools),
|
||||
app_tools: None,
|
||||
@@ -308,7 +308,7 @@ async fn build_nested_router(exec: &ExecContext) -> ToolRouter {
|
||||
)
|
||||
}
|
||||
|
||||
async fn call_nested_tool(
|
||||
async fn call_code_mode_function(
|
||||
exec: ExecContext,
|
||||
tool_runtime: ToolCallRuntime,
|
||||
tool_name: String,
|
||||
@@ -332,7 +332,7 @@ async fn call_nested_tool(
|
||||
Err(error) => return Err(FunctionCallError::RespondToModel(error)),
|
||||
}
|
||||
} else {
|
||||
match build_nested_tool_payload(tool_runtime.find_spec(&tool_name), &tool_name, input) {
|
||||
match build_code_mode_function_call_payload(tool_runtime.find_spec(&tool_name), &tool_name, input) {
|
||||
Ok(payload) => payload,
|
||||
Err(error) => return Err(FunctionCallError::RespondToModel(error)),
|
||||
}
|
||||
@@ -367,7 +367,7 @@ fn tool_kind_for_name(
|
||||
.ok_or_else(|| format!("tool `{tool_name}` is not enabled in {PUBLIC_TOOL_NAME}"))
|
||||
}
|
||||
|
||||
fn build_nested_tool_payload(
|
||||
fn build_code_mode_function_call_payload(
|
||||
spec: Option<ToolSpec>,
|
||||
tool_name: &str,
|
||||
input: Option<JsonValue>,
|
||||
|
||||
@@ -85,7 +85,7 @@ fn append_code_mode_sample_uses_global_tools_for_valid_identifiers() {
|
||||
"{ foo: string }".to_string(),
|
||||
"unknown".to_string(),
|
||||
),
|
||||
"desc\n\nexec tool declaration:\n```ts\ndeclare const tools: { mcp__ologs__get_profile(args: { foo: string }): Promise<unknown>; };\n```"
|
||||
"desc\n\nexec tool declaration:\n```ts\ndeclare const functions: { mcp__ologs__get_profile(args: { foo: string }): Promise<unknown>; };\n```"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -99,6 +99,6 @@ fn append_code_mode_sample_normalizes_invalid_identifiers() {
|
||||
"{ foo: string }".to_string(),
|
||||
"unknown".to_string(),
|
||||
),
|
||||
"desc\n\nexec tool declaration:\n```ts\ndeclare const tools: { mcp__rmcp__echo_tool(args: { foo: string }): Promise<unknown>; };\n```"
|
||||
"desc\n\nexec tool declaration:\n```ts\ndeclare const functions: { mcp__rmcp__echo_tool(args: { foo: string }): Promise<unknown>; };\n```"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -66,7 +66,7 @@ impl ToolRouter {
|
||||
specs
|
||||
.iter()
|
||||
.filter_map(|configured_tool| {
|
||||
if !codex_code_mode::is_code_mode_nested_tool(configured_tool.spec.name()) {
|
||||
if !codex_code_mode::is_code_mode_function(configured_tool.spec.name()) {
|
||||
Some(configured_tool.spec.clone())
|
||||
} else {
|
||||
None
|
||||
|
||||
@@ -546,7 +546,7 @@ impl ToolsConfig {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn for_code_mode_nested_tools(&self) -> Self {
|
||||
pub fn for_code_mode_functions(&self) -> Self {
|
||||
let mut nested = self.clone();
|
||||
nested.code_mode_enabled = false;
|
||||
nested.code_mode_only_enabled = false;
|
||||
@@ -2460,7 +2460,7 @@ pub(crate) fn build_specs_with_discoverable_tools(
|
||||
let exec_permission_approvals_enabled = config.exec_permission_approvals_enabled;
|
||||
|
||||
if config.code_mode_enabled {
|
||||
let nested_config = config.for_code_mode_nested_tools();
|
||||
let nested_config = config.for_code_mode_functions();
|
||||
let (nested_specs, _) = build_specs_with_discoverable_tools(
|
||||
&nested_config,
|
||||
mcp_tools.clone(),
|
||||
@@ -2479,7 +2479,7 @@ pub(crate) fn build_specs_with_discoverable_tools(
|
||||
ToolSpec::Freeform(tool) => (tool.name, tool.description),
|
||||
_ => return None,
|
||||
};
|
||||
codex_code_mode::is_code_mode_nested_tool(&name).then_some((name, description))
|
||||
codex_code_mode::is_code_mode_function(&name).then_some((name, description))
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
enabled_tools.sort_by(|left, right| left.0.cmp(&right.0));
|
||||
|
||||
@@ -2967,7 +2967,7 @@ fn code_mode_augments_builtin_tool_descriptions_with_typed_sample() {
|
||||
|
||||
assert_eq!(
|
||||
description,
|
||||
"View a local image from the filesystem (only use if given a full filepath by the user, and the image isn't already attached to the thread context within <image ...> tags).\n\nexec tool declaration:\n```ts\ndeclare const tools: { view_image(args: { path: string; }): Promise<{ detail: string | null; image_url: string; }>; };\n```"
|
||||
"View a local image from the filesystem (only use if given a full filepath by the user, and the image isn't already attached to the thread context within <image ...> tags).\n\nexec tool declaration:\n```ts\ndeclare const functions: { view_image(args: { path: string; }): Promise<{ detail: string | null; image_url: string; }>; };\n```"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -3019,7 +3019,7 @@ fn code_mode_augments_mcp_tool_descriptions_with_namespaced_sample() {
|
||||
|
||||
assert_eq!(
|
||||
description,
|
||||
"Echo text\n\nexec tool declaration:\n```ts\ndeclare const tools: { mcp__sample__echo(args: { message: string; }): Promise<{ _meta?: unknown; content: Array<unknown>; isError?: boolean; structuredContent?: unknown; }>; };\n```"
|
||||
"Echo text\n\nexec tool declaration:\n```ts\ndeclare const functions: { mcp__sample__echo(args: { message: string; }): Promise<{ _meta?: unknown; content: Array<unknown>; isError?: boolean; structuredContent?: unknown; }>; };\n```"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -3038,7 +3038,7 @@ fn code_mode_only_restricts_model_tools_to_exec_tools() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn code_mode_only_exec_description_includes_full_nested_tool_details() {
|
||||
fn code_mode_only_exec_description_includes_full_details_for_code_mode_functions() {
|
||||
let config = test_config();
|
||||
let model_info = ModelsManager::construct_model_info_offline_for_tests("gpt-5-codex", &config);
|
||||
let mut features = Features::with_defaults();
|
||||
@@ -3067,7 +3067,7 @@ fn code_mode_only_exec_description_includes_full_nested_tool_details() {
|
||||
panic!("expected freeform tool");
|
||||
};
|
||||
|
||||
assert!(!description.contains("Enabled nested tools:"));
|
||||
assert!(!description.contains("Enabled functions:"));
|
||||
assert!(!description.contains("Nested tool reference:"));
|
||||
assert!(description.starts_with(
|
||||
"Use `exec/wait` tool to run all other tools, do not attempt to use any other tools directly"
|
||||
@@ -3077,7 +3077,7 @@ fn code_mode_only_exec_description_includes_full_nested_tool_details() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn code_mode_exec_description_omits_nested_tool_details_when_not_code_mode_only() {
|
||||
fn code_mode_exec_description_omits_details_for_code_mode_functions_when_not_code_mode_only() {
|
||||
let config = test_config();
|
||||
let model_info = ModelsManager::construct_model_info_offline_for_tests("gpt-5-codex", &config);
|
||||
let mut features = Features::with_defaults();
|
||||
|
||||
@@ -50,7 +50,7 @@ fn custom_tool_output_items(req: &ResponsesRequest, call_id: &str) -> Vec<Value>
|
||||
}
|
||||
|
||||
fn tool_names(body: &Value) -> Vec<String> {
|
||||
body.get("tools")
|
||||
body.get("functions")
|
||||
.and_then(Value::as_array)
|
||||
.map(|tools| {
|
||||
tools
|
||||
@@ -94,7 +94,7 @@ fn wait_for_file_source(path: &Path) -> Result<String> {
|
||||
let quoted_path = shlex::try_join([path.to_string_lossy().as_ref()])?;
|
||||
let command = format!("if [ -f {quoted_path} ]; then printf ready; fi");
|
||||
Ok(format!(
|
||||
r#"while ((await tools.exec_command({{ cmd: {command:?} }})).output !== "ready") {{
|
||||
r#"while ((await functions.exec_command({{ cmd: {command:?} }})).output !== "ready") {{
|
||||
}}"#
|
||||
))
|
||||
}
|
||||
@@ -250,7 +250,7 @@ async fn code_mode_can_return_exec_command_output() -> Result<()> {
|
||||
&server,
|
||||
"use exec to run exec_command",
|
||||
r#"
|
||||
text(JSON.stringify(await tools.exec_command({ cmd: "printf code_mode_exec_marker" })));
|
||||
text(JSON.stringify(await functions.exec_command({ cmd: "printf code_mode_exec_marker" })));
|
||||
"#,
|
||||
/*include_apply_patch*/ false,
|
||||
)
|
||||
@@ -303,7 +303,7 @@ async fn code_mode_only_restricts_prompt_tools() -> Result<()> {
|
||||
let _ = config.features.enable(Feature::CodeModeOnly);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
test.submit_turn("list tools in code mode only").await?;
|
||||
test.submit_turn("list functions in code mode only").await?;
|
||||
|
||||
let first_body = resp_mock.single_request().body_json();
|
||||
assert_eq!(
|
||||
@@ -316,7 +316,7 @@ async fn code_mode_only_restricts_prompt_tools() -> Result<()> {
|
||||
|
||||
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_only_can_call_nested_tools() -> Result<()> {
|
||||
async fn code_mode_only_can_call_functions() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
@@ -328,7 +328,7 @@ async fn code_mode_only_can_call_nested_tools() -> Result<()> {
|
||||
"call-1",
|
||||
"exec",
|
||||
r#"
|
||||
const output = await tools.exec_command({ cmd: "printf code_mode_only_nested_tool_marker" });
|
||||
const output = await functions.exec_command({ cmd: "printf code_mode_only_function_call_marker" });
|
||||
text(output.output);
|
||||
"#,
|
||||
),
|
||||
@@ -349,7 +349,7 @@ text(output.output);
|
||||
let _ = config.features.enable(Feature::CodeModeOnly);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
test.submit_turn("use exec to run nested tool in code mode only")
|
||||
test.submit_turn("use exec to run function in code mode only")
|
||||
.await?;
|
||||
|
||||
let request = follow_up_mock.single_request();
|
||||
@@ -357,15 +357,15 @@ text(output.output);
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"code_mode_only nested tool call failed unexpectedly: {output}"
|
||||
"code_mode_only function call failed unexpectedly: {output}"
|
||||
);
|
||||
assert_eq!(output, "code_mode_only_nested_tool_marker");
|
||||
assert_eq!(output, "code_mode_only_function_call_marker");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_update_plan_nested_tool_result_is_empty_object() -> Result<()> {
|
||||
async fn code_mode_update_plan_function_call_result_is_empty_object() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
@@ -373,7 +373,7 @@ async fn code_mode_update_plan_nested_tool_result_is_empty_object() -> Result<()
|
||||
&server,
|
||||
"use exec to run update_plan",
|
||||
r#"
|
||||
const result = await tools.update_plan({
|
||||
const result = await functions.update_plan({
|
||||
plan: [{ step: "Run update_plan from code mode", status: "in_progress" }],
|
||||
});
|
||||
text(JSON.stringify(result));
|
||||
@@ -398,7 +398,7 @@ text(JSON.stringify(result));
|
||||
|
||||
#[cfg_attr(windows, ignore = "flaky on windows")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_nested_tool_calls_can_run_in_parallel() -> Result<()> {
|
||||
async fn code_mode_function_call_calls_can_run_in_parallel() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
@@ -420,8 +420,8 @@ const args = {
|
||||
};
|
||||
|
||||
await Promise.all([
|
||||
tools.test_sync_tool(args),
|
||||
tools.test_sync_tool(args),
|
||||
functions.test_sync_tool(args),
|
||||
functions.test_sync_tool(args),
|
||||
]);
|
||||
"#;
|
||||
let code = r#"
|
||||
@@ -435,8 +435,8 @@ const args = {
|
||||
};
|
||||
|
||||
const results = await Promise.all([
|
||||
tools.test_sync_tool(args),
|
||||
tools.test_sync_tool(args),
|
||||
functions.test_sync_tool(args),
|
||||
functions.test_sync_tool(args),
|
||||
]);
|
||||
|
||||
text(JSON.stringify(results));
|
||||
@@ -467,15 +467,15 @@ text(JSON.stringify(results));
|
||||
)
|
||||
.await;
|
||||
|
||||
test.submit_turn("warm up nested tools in parallel").await?;
|
||||
test.submit_turn("warm up nested functions in parallel").await?;
|
||||
|
||||
let start = Instant::now();
|
||||
test.submit_turn("run nested tools in parallel").await?;
|
||||
test.submit_turn("run nested functions in parallel").await?;
|
||||
let duration = start.elapsed();
|
||||
|
||||
assert!(
|
||||
duration < Duration::from_millis(1_600),
|
||||
"expected nested tools to finish in parallel, got {duration:?}",
|
||||
"expected nested functions to finish in parallel, got {duration:?}",
|
||||
);
|
||||
|
||||
let req = response_mock
|
||||
@@ -498,7 +498,7 @@ async fn code_mode_can_truncate_final_result_with_configured_budget() -> Result<
|
||||
&server,
|
||||
"use exec to truncate the final result",
|
||||
r#"// @exec: {"max_output_tokens": 6}
|
||||
text(JSON.stringify(await tools.exec_command({
|
||||
text(JSON.stringify(await functions.exec_command({
|
||||
cmd: "printf 'token one token two token three token four token five token six token seven'",
|
||||
max_output_tokens: 100
|
||||
})));
|
||||
@@ -580,10 +580,10 @@ async fn code_mode_exec_surfaces_handler_errors_as_exceptions() -> Result<()> {
|
||||
let server = responses::start_mock_server().await;
|
||||
let (_test, second_mock) = run_code_mode_turn(
|
||||
&server,
|
||||
"surface nested tool handler failures as script exceptions",
|
||||
"surface function call handler failures as script exceptions",
|
||||
r#"
|
||||
try {
|
||||
await tools.exec_command({});
|
||||
await functions.exec_command({});
|
||||
text("no-exception");
|
||||
} catch (error) {
|
||||
text(`caught:${error?.message ?? String(error)}`);
|
||||
@@ -598,7 +598,7 @@ try {
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"script should catch the nested tool error: {output}"
|
||||
"script should catch the tool error: {output}"
|
||||
);
|
||||
assert!(
|
||||
output.contains("caught:"),
|
||||
@@ -606,7 +606,7 @@ try {
|
||||
);
|
||||
assert!(
|
||||
!output.contains("no-exception"),
|
||||
"nested tool error should not allow success path: {output}"
|
||||
"tool error should not allow success path: {output}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
@@ -1233,7 +1233,7 @@ text("session a start");
|
||||
yield_control();
|
||||
{session_a_wait}
|
||||
text("session a done");
|
||||
await tools.exec_command({{ cmd: {session_a_done_command:?} }});
|
||||
await functions.exec_command({{ cmd: {session_a_done_command:?} }});
|
||||
"#
|
||||
);
|
||||
let session_b_code = format!(
|
||||
@@ -1422,7 +1422,7 @@ async fn code_mode_background_keeps_running_on_later_turn_without_wait() -> Resu
|
||||
r#"
|
||||
text("before yield");
|
||||
yield_control();
|
||||
await tools.exec_command({{ cmd: {write_file_command:?} }});
|
||||
await functions.exec_command({{ cmd: {write_file_command:?} }});
|
||||
text("after yield");
|
||||
"#
|
||||
);
|
||||
@@ -1636,7 +1636,7 @@ async fn code_mode_notify_injects_additional_exec_tool_output_into_active_contex
|
||||
"use exec notify helper",
|
||||
r#"
|
||||
notify("code_mode_notify_marker");
|
||||
await tools.test_sync_tool({});
|
||||
await functions.test_sync_tool({});
|
||||
text("done");
|
||||
"#,
|
||||
/*include_apply_patch*/ false,
|
||||
@@ -1816,7 +1816,7 @@ async fn code_mode_can_use_view_image_result_with_image_helper() -> Result<()> {
|
||||
let image_path_json = serde_json::to_string(&image_path.to_string_lossy().to_string())?;
|
||||
let code = format!(
|
||||
r#"
|
||||
const out = await tools.view_image({{ path: {image_path_json}, detail: "original" }});
|
||||
const out = await functions.view_image({{ path: {image_path_json}, detail: "original" }});
|
||||
image(out);
|
||||
"#
|
||||
);
|
||||
@@ -1879,7 +1879,7 @@ image(out);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_can_apply_patch_via_nested_tool() -> Result<()> {
|
||||
async fn code_mode_can_apply_patch_via_function_call() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
@@ -1887,7 +1887,7 @@ async fn code_mode_can_apply_patch_via_nested_tool() -> Result<()> {
|
||||
let patch = format!(
|
||||
"*** Begin Patch\n*** Add File: {file_name}\n+hello from code_mode\n*** End Patch\n"
|
||||
);
|
||||
let code = format!("text(await tools.apply_patch({patch:?}));\n");
|
||||
let code = format!("text(await functions.apply_patch({patch:?}));\n");
|
||||
|
||||
let (test, second_mock) = run_code_mode_turn(
|
||||
&server,
|
||||
@@ -1929,7 +1929,7 @@ async fn code_mode_can_print_structured_mcp_tool_result_fields() -> Result<()> {
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let code = r#"
|
||||
const { content, structuredContent, isError } = await tools.mcp__rmcp__echo({
|
||||
const { content, structuredContent, isError } = await functions.mcp__rmcp__echo({
|
||||
message: "ping",
|
||||
});
|
||||
text(
|
||||
@@ -1962,17 +1962,17 @@ contentLength=0"
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_exposes_mcp_tools_on_global_tools_object() -> Result<()> {
|
||||
async fn code_mode_exposes_mcp_tools_on_global_functions_object() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let code = r#"
|
||||
const { content, structuredContent, isError } = await tools.mcp__rmcp__echo({
|
||||
const { content, structuredContent, isError } = await functions.mcp__rmcp__echo({
|
||||
message: "ping",
|
||||
});
|
||||
text(
|
||||
`hasEcho=${String(Object.keys(tools).includes("mcp__rmcp__echo"))}\n` +
|
||||
`echoType=${typeof tools.mcp__rmcp__echo}\n` +
|
||||
`hasEcho=${String(Object.keys(functions).includes("mcp__rmcp__echo"))}\n` +
|
||||
`echoType=${typeof functions.mcp__rmcp__echo}\n` +
|
||||
`echo=${structuredContent?.echo ?? "missing"}\n` +
|
||||
`isError=${String(isError)}\n` +
|
||||
`contentLength=${content.length}`
|
||||
@@ -1980,7 +1980,7 @@ text(
|
||||
"#;
|
||||
|
||||
let (_test, second_mock) =
|
||||
run_code_mode_turn_with_rmcp(&server, "use exec to inspect the global tools object", code)
|
||||
run_code_mode_turn_with_rmcp(&server, "use exec to inspect the global functions object", code)
|
||||
.await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
@@ -2003,19 +2003,19 @@ contentLength=0"
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_exposes_namespaced_mcp_tools_on_global_tools_object() -> Result<()> {
|
||||
async fn code_mode_exposes_namespaced_mcp_tools_on_global_functions_object() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let code = r#"
|
||||
text(JSON.stringify({
|
||||
hasExecCommand: typeof tools.exec_command === "function",
|
||||
hasNamespacedEcho: typeof tools.mcp__rmcp__echo === "function",
|
||||
hasExecCommand: typeof functions.exec_command === "function",
|
||||
hasNamespacedEcho: typeof functions.mcp__rmcp__echo === "function",
|
||||
}));
|
||||
"#;
|
||||
|
||||
let (_test, second_mock) =
|
||||
run_code_mode_turn_with_rmcp(&server, "use exec to inspect the global tools object", code)
|
||||
run_code_mode_turn_with_rmcp(&server, "use exec to inspect the global functions object", code)
|
||||
.await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
@@ -2023,7 +2023,7 @@ text(JSON.stringify({
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"exec global tools inspection failed unexpectedly: {output}"
|
||||
"exec global functions inspection failed unexpectedly: {output}"
|
||||
);
|
||||
|
||||
let parsed: Value = serde_json::from_str(&output)?;
|
||||
@@ -2044,7 +2044,7 @@ async fn code_mode_exposes_normalized_illegal_mcp_tool_names() -> Result<()> {
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let code = r#"
|
||||
const result = await tools.mcp__rmcp__echo_tool({ message: "ping" });
|
||||
const result = await functions.mcp__rmcp__echo_tool({ message: "ping" });
|
||||
text(`echo=${result.structuredContent.echo}`);
|
||||
"#;
|
||||
|
||||
@@ -2090,7 +2090,7 @@ text(JSON.stringify(Object.getOwnPropertyNames(globalThis).sort()));
|
||||
let globals = globals.into_iter().collect::<HashSet<_>>();
|
||||
let expected = [
|
||||
"AggregateError",
|
||||
"ALL_TOOLS",
|
||||
"ALL_FUNCTIONS",
|
||||
"Array",
|
||||
"ArrayBuffer",
|
||||
"AsyncDisposableStack",
|
||||
@@ -2153,6 +2153,7 @@ text(JSON.stringify(Object.getOwnPropertyNames(globalThis).sort()));
|
||||
"escape",
|
||||
"exit",
|
||||
"eval",
|
||||
"functions",
|
||||
"globalThis",
|
||||
"image",
|
||||
"isFinite",
|
||||
@@ -2163,7 +2164,6 @@ text(JSON.stringify(Object.getOwnPropertyNames(globalThis).sort()));
|
||||
"parseInt",
|
||||
"store",
|
||||
"text",
|
||||
"tools",
|
||||
"undefined",
|
||||
"unescape",
|
||||
"yield_control",
|
||||
@@ -2184,13 +2184,13 @@ async fn code_mode_exports_all_tools_metadata_for_builtin_tools() -> Result<()>
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let code = r#"
|
||||
const tool = ALL_TOOLS.find(({ name }) => name === "view_image");
|
||||
const tool = ALL_FUNCTIONS.find(({ name }) => name === "view_image");
|
||||
text(JSON.stringify(tool));
|
||||
"#;
|
||||
|
||||
let (_test, second_mock) = run_code_mode_turn(
|
||||
&server,
|
||||
"use exec to inspect ALL_TOOLS",
|
||||
"use exec to inspect ALL_FUNCTIONS",
|
||||
code,
|
||||
/*include_apply_patch*/ false,
|
||||
)
|
||||
@@ -2201,18 +2201,18 @@ text(JSON.stringify(tool));
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"exec ALL_TOOLS lookup failed unexpectedly: {output}"
|
||||
"exec ALL_FUNCTIONS lookup failed unexpectedly: {output}"
|
||||
);
|
||||
|
||||
let parsed: Value = serde_json::from_str(
|
||||
&custom_tool_output_last_non_empty_text(&req, "call-1")
|
||||
.expect("exec ALL_TOOLS lookup should emit JSON"),
|
||||
.expect("exec ALL_FUNCTIONS lookup should emit JSON"),
|
||||
)?;
|
||||
assert_eq!(
|
||||
parsed,
|
||||
serde_json::json!({
|
||||
"name": "view_image",
|
||||
"description": "View a local image from the filesystem (only use if given a full filepath by the user, and the image isn't already attached to the thread context within <image ...> tags).\n\nexec tool declaration:\n```ts\ndeclare const tools: { view_image(args: { path: string; }): Promise<{ detail: string | null; image_url: string; }>; };\n```",
|
||||
"description": "View a local image from the filesystem (only use if given a full filepath by the user, and the image isn't already attached to the thread context within <image ...> tags).\n\nexec tool declaration:\n```ts\ndeclare const functions: { view_image(args: { path: string; }): Promise<{ detail: string | null; image_url: string; }>; };\n```",
|
||||
})
|
||||
);
|
||||
|
||||
@@ -2225,32 +2225,32 @@ async fn code_mode_exports_all_tools_metadata_for_namespaced_mcp_tools() -> Resu
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let code = r#"
|
||||
const tool = ALL_TOOLS.find(
|
||||
const tool = ALL_FUNCTIONS.find(
|
||||
({ name }) => name === "mcp__rmcp__echo"
|
||||
);
|
||||
text(JSON.stringify(tool));
|
||||
"#;
|
||||
|
||||
let (_test, second_mock) =
|
||||
run_code_mode_turn_with_rmcp(&server, "use exec to inspect ALL_TOOLS", code).await?;
|
||||
run_code_mode_turn_with_rmcp(&server, "use exec to inspect ALL_FUNCTIONS", code).await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"exec ALL_TOOLS MCP lookup failed unexpectedly: {output}"
|
||||
"exec ALL_FUNCTIONS MCP lookup failed unexpectedly: {output}"
|
||||
);
|
||||
|
||||
let parsed: Value = serde_json::from_str(
|
||||
&custom_tool_output_last_non_empty_text(&req, "call-1")
|
||||
.expect("exec ALL_TOOLS MCP lookup should emit JSON"),
|
||||
.expect("exec ALL_FUNCTIONS MCP lookup should emit JSON"),
|
||||
)?;
|
||||
assert_eq!(
|
||||
parsed,
|
||||
serde_json::json!({
|
||||
"name": "mcp__rmcp__echo",
|
||||
"description": "Echo back the provided message and include environment data.\n\nexec tool declaration:\n```ts\ndeclare const tools: { mcp__rmcp__echo(args: { env_var?: string; message: string; }): Promise<{ _meta?: unknown; content: Array<unknown>; isError?: boolean; structuredContent?: unknown; }>; };\n```",
|
||||
"description": "Echo back the provided message and include environment data.\n\nexec tool declaration:\n```ts\ndeclare const functions: { mcp__rmcp__echo(args: { env_var?: string; message: string; }): Promise<{ _meta?: unknown; content: Array<unknown>; isError?: boolean; structuredContent?: unknown; }>; };\n```",
|
||||
})
|
||||
);
|
||||
|
||||
@@ -2291,8 +2291,8 @@ async fn code_mode_can_call_hidden_dynamic_tools() -> Result<()> {
|
||||
test.session_configured = new_thread.session_configured;
|
||||
|
||||
let code = r#"
|
||||
const tool = ALL_TOOLS.find(({ name }) => name === "hidden_dynamic_tool");
|
||||
const out = await tools.hidden_dynamic_tool({ city: "Paris" });
|
||||
const tool = ALL_FUNCTIONS.find(({ name }) => name === "hidden_dynamic_tool");
|
||||
const out = await functions.hidden_dynamic_tool({ city: "Paris" });
|
||||
text(
|
||||
JSON.stringify({
|
||||
name: tool?.name ?? null,
|
||||
@@ -2324,7 +2324,7 @@ text(
|
||||
test.codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "use exec to inspect and call hidden tools".into(),
|
||||
text: "use exec to inspect and call hidden functions".into(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
@@ -2396,7 +2396,7 @@ text(
|
||||
.and_then(Value::as_str)
|
||||
.is_some_and(|description| {
|
||||
description.contains("A hidden dynamic tool.")
|
||||
&& description.contains("declare const tools:")
|
||||
&& description.contains("declare const functions:")
|
||||
&& description.contains("hidden_dynamic_tool(args:")
|
||||
})
|
||||
);
|
||||
@@ -2410,7 +2410,7 @@ async fn code_mode_can_print_content_only_mcp_tool_result_fields() -> Result<()>
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let code = r#"
|
||||
const { content, structuredContent, isError } = await tools.mcp__rmcp__image_scenario({
|
||||
const { content, structuredContent, isError } = await functions.mcp__rmcp__image_scenario({
|
||||
scenario: "text_only",
|
||||
caption: "caption from mcp",
|
||||
});
|
||||
@@ -2453,7 +2453,7 @@ async fn code_mode_can_print_error_mcp_tool_result_fields() -> Result<()> {
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let code = r#"
|
||||
const { content, structuredContent, isError } = await tools.mcp__rmcp__echo({});
|
||||
const { content, structuredContent, isError } = await functions.mcp__rmcp__echo({});
|
||||
const firstText = content[0]?.text ?? "";
|
||||
const mentionsMissingMessage =
|
||||
firstText.includes("missing field") && firstText.includes("message");
|
||||
|
||||
Reference in New Issue
Block a user